Made to work with troff

This commit is contained in:
ceriel 1990-06-20 10:05:22 +00:00
parent 97e131bc81
commit 39730420a2
25 changed files with 583 additions and 456 deletions

View file

@ -16,37 +16,40 @@ CA=ca/ca?
EGO=$(INTRO) $(OV) $(IC) $(CF) $(IL) $(SR) $(CS) $(SP) $(CJ) $(BO) \
$(UD) $(LV) $(RA) $(CA)
REFER=refer
TROFF=troff
TBL=tbl
TARGET=-Tlp
../ego.doc: $(EGO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail > ../ego.doc
../ego.doc: refs.opt refs.stat refs.gen intro/head intro/tail $(EGO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | $(TBL) > ../ego.doc
ego.f: $(EGO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | nroff -ms > ego.f
intro.f: $(INTRO)
$(REFER) -sA+T -l4,2 $(REFS) ov/head $(INTRO) intro/tail | nroff -ms > intro.f
ov.f: $(OV)
$(REFER) -sA+T -l4,2 $(REFS) ov/head $(OV) intro/tail | nroff -ms > ov.f
ic.f: $(IC)
$(REFER) -sA+T -l4,2 $(REFS) ic/head $(IC) intro/tail | nroff -ms > ic.f
cf.f: $(CF)
$(REFER) -sA+T -l4,2 $(REFS) cf/head $(CF) intro/tail | nroff -ms > cf.f
il.f: $(IL)
$(REFER) -sA+T -l4,2 $(REFS) il/head $(IL) intro/tail | nroff -ms > il.f
sr.f: $(SR)
$(REFER) -sA+T -l4,2 $(REFS) sr/head $(SR) intro/tail | nroff -ms > sr.f
cs.f: $(CS)
$(REFER) -sA+T -l4,2 $(REFS) cs/head $(CS) intro/tail | nroff -ms > cs.f
sp.f: $(SP)
$(REFER) -sA+T -l4,2 $(REFS) sp/head $(SP) intro/tail | nroff -ms > sp.f
cj.f: $(CJ)
$(REFER) -sA+T -l4,2 $(REFS) cj/head $(CJ) intro/tail | nroff -ms > cj.f
bo.f: $(BO)
$(REFER) -sA+T -l4,2 $(REFS) bo/head $(BO) intro/tail | nroff -ms > bo.f
ud.f: $(UD)
$(REFER) -sA+T -l4,2 $(REFS) ud/head $(UD) intro/tail | nroff -ms > ud.f
lv.f: $(LV)
$(REFER) -sA+T -l4,2 $(REFS) lv/head $(LV) intro/tail | nroff -ms > lv.f
ra.f: $(RA)
$(REFER) -sA+T -l4,2 $(REFS) ra/head $(RA) intro/tail | nroff -ms > ra.f
ca.f: $(CA)
$(REFER) -sA+T -l4,2 $(REFS) ca/head $(CA) intro/tail | nroff -ms > ca.f
ego.f: refs.opt refs.stat refs.gen intro/head intro/tail $(EGO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(EGO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ego.f
intro.f: refs.opt refs.stat refs.gen intro/head intro/tail $(INTRO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(INTRO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > intro.f
ov.f: refs.opt refs.stat refs.gen intro/head intro/tail $(OV)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(OV) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ov.f
ic.f: refs.opt refs.stat refs.gen intro/head intro/tail $(IC)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(IC) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ic.f
cf.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CF)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(CF) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cf.f
il.f: refs.opt refs.stat refs.gen intro/head intro/tail $(IL)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(IL) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > il.f
sr.f: refs.opt refs.stat refs.gen intro/head intro/tail $(SR)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(SR) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > sr.f
cs.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CS)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(CS) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cs.f
sp.f: refs.opt refs.stat refs.gen intro/head intro/tail $(SP)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(SP) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > sp.f
cj.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CJ)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(CJ) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > cj.f
bo.f: refs.opt refs.stat refs.gen intro/head intro/tail $(BO)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(BO) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > bo.f
ud.f: refs.opt refs.stat refs.gen intro/head intro/tail $(UD)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(UD) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ud.f
lv.f: refs.opt refs.stat refs.gen intro/head intro/tail $(LV)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(LV) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > lv.f
ra.f: refs.opt refs.stat refs.gen intro/head intro/tail $(RA)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(RA) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ra.f
ca.f: refs.opt refs.stat refs.gen intro/head intro/tail $(CA)
$(REFER) -sA+T -l4,2 $(REFS) intro/head $(CA) intro/tail | $(TBL) | $(TROFF) $(TARGET) -ms > ca.f

View file

@ -27,20 +27,23 @@ While-loop optimization
The straightforward way to translate a while loop is to
put the test for loop termination at the beginning of the loop.
.DS
while cond loop LAB1: Test cond
body of the loop ---> Branch On False To LAB2
end loop code for body of loop
Branch To LAB1
LAB2:
while cond loop \kyLAB1: \kxTest cond
body of the loop --->\h'|\nxu'Branch On False To LAB2
end loop\h'|\nxu'code for body of loop
\h'|\nxu'Branch To LAB1
\h'|\nyu'LAB2:
Fig. 10.1 Example of Branch Optimization
.DE
If the condition fails at the Nth iteration, the following code
gets executed (dynamically):
.DS
N * conditional branch (which fails N-1 times)
N-1 * unconditional branch
N-1 * body of the loop
.TS
l l l.
N * conditional branch (which fails N-1 times)
N-1 * unconditional branch
N-1 * body of the loop
.TE
.DE
An alternative translation is:
.DS
@ -53,9 +56,12 @@ LAB2:
.DE
This translation results in the following profile:
.DS
N * conditional branch (which succeeds N-1 times)
1 * unconditional branch
N-1 * body of the loop
.TS
l l l.
N * conditional branch (which succeeds N-1 times)
1 * unconditional branch
N-1 * body of the loop
.TE
.DE
So the second translation will be significantly faster if N >> 2.
If N=2, execution time will be slightly increased.
@ -79,12 +85,15 @@ the basic block that comes textually next to S must stay
in that position.
So the transformation in Fig. 10.2 is illegal.
.DS
LAB1: S1 LAB1: S1
BRA LAB2 S2
... --> BEQ LAB3
LAB2: S2 ...
BEQ LAB3 S3
S3
.TS
l l l l l.
LAB1: S1 LAB1: S1
BRA LAB2 S2
... --> BEQ LAB3
LAB2: S2 ...
BEQ LAB3 S3
S3
.TE
Fig. 10.2 An illegal transformation of Branch Optimization
.DE
@ -118,34 +127,36 @@ the last instruction of S is a conditional branch
If such a block B is found, the control flow graph is changed
as depicted in Fig. 10.3.
.DS
| |
| v
v |
|-----<------| ----->-----|
____|____ | |
| | | |-------| |
| S1 | | | v |
| Bcc | | | .... |
|--| | | | |
| --------- | | ----|---- |
| | | | | |
| .... ^ | | S2 | |
| | | | | |
| --------- | | | | |
v | | | ^ --------- |
| | S2 | | | | |
| | BRA | | | |-----<-----
| | | | | v
| --------- | | ____|____
| | | | | |
| ------>------ | | S1 |
| | | Bnn |
|-------| | | |
| | ----|----
v | |
|----<--|
|
v
.ft 5
| |
| v
v |
|-----<------| ----->-----|
____|____ | |
| | | |-------| |
| S1 | | | v |
| Bcc | | | .... |
|--| | | | |
| --------- | | ----|---- |
| | | | | |
| .... ^ | | S2 | |
| | | | | |
| --------- | | | | |
v | | | ^ --------- |
| | S2 | | | | |
| | BRA | | | |-----<-----
| | | | | v
| --------- | | ____|____
| | | | | |
| ------>------ | | S1 |
| | | Bnn |
|-------| | | |
| | ----|----
v | |
|----<--|
|
v
.ft R
Fig. 10.3 Transformation of the CFG by Branch Optimization
.DE

View file

@ -63,13 +63,3 @@ present in the input program).
On the other hand, an identifier may be only internally visible.
If such an identifier is referenced before being defined,
an INA or INP pseudo must be emitted prior to its first reference.
.UH
Acknowledgements
.PP
The author would like to thank Andy Tanenbaum for his guidance,
Duk Bekema for implementing the Common Subexpression Elimination phase
and writing the initial documentation of that phase,
Dick Grune for reading the manuscript of this report
and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren
and the members of the S.T.W. user's group for their
interest and assistance.

View file

@ -7,9 +7,12 @@ The optimization below is only possible if
we know for sure that the call to P cannot
change A.
.DS
A := 10; A:= 10;
P; -- procedure call --> P;
B := A + 2; B := 12;
.TS
l l.
A := 10; A:= 10;
P; -- procedure call --> P;
B := A + 2; B := 12;
.TE
.DE
Although it is not possible to predict exactly
all the effects a procedure call has, we may

View file

@ -27,16 +27,18 @@ else
S3
(pseudo) EM:
TEST COND TEST COND
BNE *1 BNE *1
S1 S1
S3 ---> BRA *2
BRA *2 1:
1: S2
S2 2:
S3 S3
.TS
l l l.
TEST COND TEST COND
BNE *1 BNE *1
S1 S1
S3 ---> BRA *2
BRA *2 1:
1: S2
S2 2:
S3 S3
2:
.TE
Fig. 9.1 An example of Cross Jumping
.DE
@ -54,20 +56,23 @@ as demonstrated by the Fig. 8.2.
.DS
Pascal:
if cond then
x := f(4)
else
x := g(5)
if cond then
x := f(4)
else
x := g(5)
EM:
EM:
... ...
LOC 4 LOC 5
CAL F CAL G
ASP 2 ASP 2
LFR 2 LFR 2
STL X STL X
.TS
l l.
... ...
LOC 4 LOC 5
CAL F CAL G
ASP 2 ASP 2
LFR 2 LFR 2
STL X STL X
.TE
Fig. 9.2 Effectiveness of Cross Jumping
.DE
@ -92,37 +97,40 @@ blocks must be split into two.
The control flow graphs before and after the optimization are shown
in Fig. 9.3 and Fig. 9.4.
.DS
.ft 5
-------- --------
| | | |
| S1 | | S2 |
| S3 | | S3 |
| | | |
-------- --------
| |
|------------------|--------------------|
|
v
-------- --------
| | | |
| S1 | | S2 |
| S3 | | S3 |
| | | |
-------- --------
| |
|------------------|--------------------|
|
v
.ft R
Fig. 9.3 CFG before optimization
.DE
.DS
-------- --------
| | | |
| S1 | | S2 |
| | | |
-------- --------
| |
|--------------------<------------------|
v
--------
| |
| S3 |
| |
--------
|
v
.ft 5
-------- --------
| | | |
| S1 | | S2 |
| | | |
-------- --------
| |
|--------------------<------------------|
v
--------
| |
| S3 |
| |
--------
|
v
.ft R
Fig. 9.4 CFG after optimization
.DE

View file

@ -18,12 +18,15 @@ but in general it will save space too.
As an example of the application of Common Subexpression Elimination,
consider the piece of program in Fig. 7.1(a).
.DS
x := a * b; TMP := a * b; x := a * b;
CODE; x := TMP; CODE
y := c + a * b; CODE y := x;
y := c + TMP;
.TS
l l l.
x := a * b; TMP := a * b; x := a * b;
CODE; x := TMP; CODE
y := c + a * b; CODE y := x;
y := c + TMP;
(a) (b) (c)
(a) (b) (c)
.TE
Fig. 7.1 Examples of Common Subexpression Elimination
.DE

View file

@ -70,10 +70,13 @@ a common subexpression,
references to the element itself are replaced by
indirect references through TMP (see Fig. 7.4).
.DS
x := A[i]; TMP := &A[i];
. . . --> x := *TMP;
A[i] := y; . . .
*TMP := y;
.TS
l l l.
x := A[i]; TMP := &A[i];
. . . --> x := *TMP;
A[i] := y; . . .
*TMP := y;
.TE
Fig. 7.4 Elimination of an array address computation
.DE

View file

@ -27,10 +27,13 @@ The value number of the result of an expression depends only
on the kind of operator and the value number(s) of the operand(s).
The expressions need not be textually equal, as shown in Fig. 7.5.
.DS
a := c; (1)
.TS
l l.
a := c; (1)
use(a * b); (2)
d := b; (3)
d := b; (3)
use(c * d); (4)
.TE
Fig. 7.5 Different expressions with the same value number
.DE
@ -43,9 +46,12 @@ and the operator (*) is the same.
.PP
As another example of the value number method, consider Fig. 7.6.
.DS
.TS
l l.
use(a * b); (1)
a := 123; (2)
use(a * b); (3)
.TE
Fig. 7.6 Identical expressions with the different value numbers
.DE
@ -64,7 +70,7 @@ of its operands.
A table of "available expressions" is used to do this mapping.
.PP
CS recognizes the following kinds of EM operands, called \fIentities\fR:
.IP
.DS
- constant
- local variable
- external variable
@ -81,6 +87,7 @@ CS recognizes the following kinds of EM operands, called \fIentities\fR:
- local base
- heap pointer
- ignore mask
.DE
.LP
Whenever a new entity is encountered in the working window,
it is entered in the symbol table and given a brand new value number.

View file

@ -26,26 +26,26 @@ There are groups for all sorts of operators:
unary, binary, and ternary.
The groups of operators are further partitioned according to the size
of their operand(s) and result.
\" .PP
\" The distinction between operators and expensive loads is not always clear.
\" The ADP instruction for example,
\" might seem a unary operator because it pops one item
\" (a pointer) from the stack.
\" However, two ADP-instructions which pop an item with the same value number
\" need not have the same result,
\" because the attributes (an offset, to be added to the pointer)
\" can be different.
\" Is it then a binary operator?
\" That would give rise to the strange, and undesirable,
\" situation that some binary operators pop two operands
\" and others pop one.
\" The conclusion is inevitable:
\" we have been fooled by the name (ADd Pointer).
\" The ADP-instruction is an expensive load.
\" In this context LAF, meaning Load Address of oFfsetted,
\" would have been a better name,
\" corresponding to LOF, like LAL,
\" Load Address of Local, corresponds to LOL.
.\" .PP
.\" The distinction between operators and expensive loads is not always clear.
.\" The ADP instruction for example,
.\" might seem a unary operator because it pops one item
.\" (a pointer) from the stack.
.\" However, two ADP-instructions which pop an item with the same value number
.\" need not have the same result,
.\" because the attributes (an offset, to be added to the pointer)
.\" can be different.
.\" Is it then a binary operator?
.\" That would give rise to the strange, and undesirable,
.\" situation that some binary operators pop two operands
.\" and others pop one.
.\" The conclusion is inevitable:
.\" we have been fooled by the name (ADd Pointer).
.\" The ADP-instruction is an expensive load.
.\" In this context LAF, meaning Load Address of oFfsetted,
.\" would have been a better name,
.\" corresponding to LOF, like LAL,
.\" Load Address of Local, corresponds to LOL.
.PP
There are groups for all sorts of stores:
direct, indirect, array element.
@ -91,10 +91,13 @@ because EM expressions are postfix.
When we find an instruction to load an operand,
we load on the fake-stack a struct with the following information:
.DS
(1) the value number of the operand
(2) the size of the operand
(3) a pointer to the first line of EM-code
that constitutes the operand
.TS
l l.
(1) the value number of the operand
(2) the size of the operand
(3) a pointer to the first line of EM-code
that constitutes the operand
.TE
.DE
In most cases, (3) will point to the line
that loaded the operand (e.g. LOL, LOC),
@ -121,9 +124,12 @@ a recurrence of this expression.
Not only will the operand(s) be popped from the fake-stack,
but the following will be pushed:
.DS
(1) the value number of the result
(2) the size of the result
(3) a pointer to the first line of the expression
.TS
l l.
(1) the value number of the result
(2) the size of the result
(3) a pointer to the first line of the expression
.TE
.DE
In this way an item on the fake-stack always contains
the necessary information.

View file

@ -90,18 +90,22 @@ and let the null item be 0.
Then the tree of fig. 3.1(a)
can be represented as in fig. 3.1(b).
.DS
.ft 5
4
/ \e
9 12
/ \e / \e
12 3 4 6
/ \e \e /
8 1 5 1
.ft R
Fig. 3.1(a) A binary tree
.ft 5
4 9 12 0 0 3 8 0 0 1 0 0 12 4 0 5 0 0 6 1 0 0 0
.ft R
Fig. 3.1(b) Its sequential representation
.DE

View file

@ -21,12 +21,15 @@ The syntactic structure of every component
is described by a set of context free syntax rules,
with the following conventions:
.DS
x a non-terminal symbol
A a terminal symbol (in capitals)
x: a b c; a grammar rule
a | b a or b
(a)+ 1 or more occurrences of a
{a} 0 or more occurrences of a
.TS
l l.
x a non-terminal symbol
A a terminal symbol (in capitals)
x: a b c; a grammar rule
a | b a or b
(a)+ 1 or more occurrences of a
{a} 0 or more occurrences of a
.TE
.DE
.NH 3
The object table
@ -70,21 +73,24 @@ identifying number
(see previous section for their use).
.DS
.UL syntax
object_table:
{datablock} ;
datablock:
D_ID -- unique identifying number
PSEUDO -- one of ROM,CON,BSS,HOL,UNKNOWN
SIZE -- # bytes declared
FLAGS
{value} -- contents of rom
{object} ; -- objects of the datablock
object:
O_ID -- unique identifying number
OFFSET -- offset within the datablock
SIZE ; -- size of the object in bytes
value:
argument ;
.TS
lw(1i) l l.
object_table:
{datablock} ;
datablock:
D_ID -- unique identifying number
PSEUDO -- one of ROM,CON,BSS,HOL,UNKNOWN
SIZE -- # bytes declared
FLAGS
{value} -- contents of rom
{object} ; -- objects of the datablock
object:
O_ID -- unique identifying number
OFFSET -- offset within the datablock
SIZE ; -- size of the object in bytes
value:
argument ;
.TE
.DE
A data block has only one flag: "external", indicating
whether the data label is externally visible.
@ -102,26 +108,29 @@ The table has one entry for
every procedure.
.DS
.UL syntax
procedure_table:
{procedure}
procedure:
P_ID -- unique identifying number
#LABELS -- number of instruction labels
#LOCALS -- number of bytes for locals
#FORMALS -- number of bytes for formals
FLAGS -- flag bits
calling -- procedures called by this one
change -- info about global variables changed
use ; -- info about global variables used
calling:
{P_ID} ; -- procedures called
change:
ext -- external variables changed
FLAGS ;
use:
FLAGS ;
ext:
{O_ID} ; -- a set of objects
.TS
lw(1i) l l.
procedure_table:
{procedure}
procedure:
P_ID -- unique identifying number
#LABELS -- number of instruction labels
#LOCALS -- number of bytes for locals
#FORMALS -- number of bytes for formals
FLAGS -- flag bits
calling -- procedures called by this one
change -- info about global variables changed
use ; -- info about global variables used
calling:
{P_ID} ; -- procedures called
change:
ext -- external variables changed
FLAGS ;
use:
FLAGS ;
ext:
{O_ID} ; -- a set of objects
.TE
.DE
.PP
The number of bytes of formal parameters accessed by
@ -231,38 +240,41 @@ of arguments), then the list is terminated by a special
argument of type CEND.
.DS
.UL syntax
em_text:
{line} ;
line:
INSTR -- opcode
OPTYPE -- operand type
operand ;
operand:
empty | -- OPTYPE = NO
SHORT | -- OPTYPE = SHORT
OFFSET | -- OPTYPE = OFFSET
LAB_ID | -- OPTYPE = INSTRLAB
O_ID | -- OPTYPE = OBJECT
P_ID | -- OPTYPE = PROCEDURE
{argument} ; -- OPTYPE = LIST
argument:
ARGTYPE
arg ;
arg:
empty | -- ARGTYPE = CEND
OFFSET |
LAB_ID |
O_ID |
P_ID |
string | -- ARGTYPE = STRING
const ; -- ARGTYPE = ICON,UCON or FCON
string:
LENGTH -- number of characters
{CHARACTER} ;
const:
SIZE -- number of bytes
string ; -- string representation of (un)signed
-- or floating point constant
.TS
lw(1i) l l.
em_text:
{line} ;
line:
INSTR -- opcode
OPTYPE -- operand type
operand ;
operand:
empty | -- OPTYPE = NO
SHORT | -- OPTYPE = SHORT
OFFSET | -- OPTYPE = OFFSET
LAB_ID | -- OPTYPE = INSTRLAB
O_ID | -- OPTYPE = OBJECT
P_ID | -- OPTYPE = PROCEDURE
{argument} ; -- OPTYPE = LIST
argument:
ARGTYPE
arg ;
arg:
empty | -- ARGTYPE = CEND
OFFSET |
LAB_ID |
O_ID |
P_ID |
string | -- ARGTYPE = STRING
const ; -- ARGTYPE = ICON,UCON or FCON
string:
LENGTH -- number of characters
{CHARACTER} ;
const:
SIZE -- number of bytes
string ; -- string representation of (un)signed
-- or floating point constant
.TE
.DE
.NH 3
The control flow graphs
@ -306,24 +318,27 @@ the identifiers of every
that the block belongs to (see next section for loops).
.DS
.UL syntax
control_flow_graph:
{basic_block} ;
basic_block:
B_ID -- unique identifying number
#INSTR -- number of EM instructions
succ
pred
idom -- immediate dominator
loops -- set of loops
FLAGS ; -- flag bits
succ:
{B_ID} ;
pred:
{B_ID} ;
idom:
B_ID ;
loops:
{LP_ID} ;
.TS
lw(1i) l l.
control_flow_graph:
{basic_block} ;
basic_block:
B_ID -- unique identifying number
#INSTR -- number of EM instructions
succ
pred
idom -- immediate dominator
loops -- set of loops
FLAGS ; -- flag bits
succ:
{B_ID} ;
pred:
{B_ID} ;
idom:
B_ID ;
loops:
{LP_ID} ;
.TE
.DE
The flag bits can have the values 'firm' and 'strong',
which are explained below.
@ -387,28 +402,30 @@ strong nor firm, as it may be skipped during some iterations
.DS
loop
if cond1 then
... -- this code will not
-- result in a firm or strong block
... \kx-- this code will not
\h'|\nxu'-- result in a firm or strong block
end if;
... -- strong (always executed)
exit when cond2;
... -- firm (not executed on
-- last iteration).
... \kx-- firm (not executed on last iteration).
end loop;
Fig. 3.2 Example of firm and strong block
.DE
.DS
.UL syntax
looptable:
{loop} ;
loop:
LP_ID -- unique identifying number
LEVEL -- loop nesting level
entry -- loop entry block
end ;
entry:
B_ID ;
end:
B_ID ;
.TS
lw(1i) l l.
looptable:
{loop} ;
loop:
LP_ID -- unique identifying number
LEVEL -- loop nesting level
entry -- loop entry block
end ;
entry:
B_ID ;
end:
B_ID ;
.TE
.DE

View file

@ -57,24 +57,27 @@ indicating which part of the EM text belongs
to that block.
.DS
.UL syntax
intermediate_code:
object_table_file
proctable_file
em_text_file
cfg_file ;
object_table_file:
LENGTH -- number of objects
object_table ;
proctable_file:
LENGTH -- number of procedures
procedure_table ;
em_text_file:
em_text ;
cfg_file:
{per_proc} ; -- one for every procedure
per_proc:
BLENGTH -- number of basic blocks
LLENGTH -- number of loops
control_flow_graph
looptable ;
.TS
lw(1i) l l.
intermediate_code:
object_table_file
proctable_file
em_text_file
cfg_file ;
object_table_file:
LENGTH -- number of objects
object_table ;
proctable_file:
LENGTH -- number of procedures
procedure_table ;
em_text_file:
em_text ;
cfg_file:
{per_proc} ; -- one for every procedure
per_proc:
BLENGTH -- number of basic blocks
LLENGTH -- number of loops
control_flow_graph
looptable ;
.TE
.DE

View file

@ -93,8 +93,11 @@ Objects are recognized by looking at the operands
of instructions that reference global data.
If we come across the instructions:
.DS
LDE X+6 -- Load Double External
LAE X+20 -- Load Address External
.TS
l l.
LDE X+6 -- Load Double External
LAE X+20 -- Load Address External
.TE
.DE
we conclude that the data block
preceded by the data label X contains an object

View file

@ -139,10 +139,10 @@ procedure p (x:integer);
begin
x := 20;
end;
...
a := 10; a := 10;
p(a); ---> a := 20;
write(a); write(a);
\&...
a := 10; \kxa := 10;
p(a); ---> \h'|\nxu'a := 20;
write(a); \h'|\nxu'write(a);
.DE
.IP 2.
P changes any of the operands of the

View file

@ -104,18 +104,21 @@ The following model was developed empirically.
Assume procedure P calls procedure Q.
The call takes place in basic block B.
.DS
ZP = # zero parameters
CP = # constant parameters - ZP
LN = Loop Nesting level (0 if outside any loop)
F = \fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0
FT = \fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0
S = size(Q) - 1 - # inline_parameters - F
L = \fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1
A = CP + 2 * ZP
N = \fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2
FM = \fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1
.TS
l l l.
ZP \&= # zero parameters
CP \&= # constant parameters - ZP
LN \&= Loop Nesting level (0 if outside any loop)
F \&= \fIif\fR # formal parameters of Q > 0 \fIthen\fR 1 \fIelse\fR 0
FT \&= \fIif\fR Q falls through \fIthen\fR 1 \fIelse\fR 0
S \&= size(Q) - 1 - # inline_parameters - F
L \&= \fIif\fR # local variables of P > 0 \fIthen\fR 0 \fIelse\fR -1
A \&= CP + 2 * ZP
N \&= \fIif\fR LN=0 and P is never called from a loop \fIthen\fR 0 \fIelse\fR (LN+1)**2
FM \&= \fIif\fR B is a firm block \fIthen\fR 2 \fIelse\fR 1
pay_off = (100/S + FT + F + L + A) * N * FM
pay_off \&= (100/S + FT + F + L + A) * N * FM
.TE
.DE
S stands for the size increase of the program,
which is slightly less than the size of Q.

View file

@ -165,19 +165,25 @@ These calls are inherited from the called procedure.
We will refer to these invocations as \fInested calls\fR
(see Fig. 5.1).
.DS
.TS
lw(2.5i) l.
procedure p is
begin .
a(); .
b(); .
begin .
a(); .
b(); .
end;
.TE
procedure r is procedure r is
begin begin
x(); x();
p(); -- in line a(); -- nested call
y(); b(); -- nested call
end; y();
end;
.TS
lw(2.5i) l.
procedure r is procedure r is
begin begin
x(); x();
p(); -- in line a(); -- nested call
y(); b(); -- nested call
end; y();
end;
.TE
Fig. 5.1 Example of nested procedure calls
.DE
@ -224,11 +230,11 @@ All list traversals look like:
traverse(list)
{
for (c = first(list); c != 0; c = CDR(c)) {
if (c is marked) {
traverse(CAR(c));
} else {
do something with c
}
if (c is marked) {
traverse(CAR(c));
} else {
do something with c
}
}
}
.DE

View file

@ -22,6 +22,6 @@ the driving routine for doing the substitution
lower level routines that do certain modifications
.IP 3_aux:
implements auxiliary procedures used by subphase 3
.IP aux
.IP aux:
implements auxiliary procedures used by several subphases.
.LP

View file

@ -1,7 +1,10 @@
.ND
.ll 80m
.nr LL 80m
.nr tl 78m
.\".ll 80m
.\".nr LL 80m
.\".nr tl 78m
.tr ~
.ds >. .
.ds [. " \[
.ds >, ,
.ds [. " [
.ds .] ]
.cs 5 22

View file

@ -1,3 +1,17 @@
.SH
Acknowledgements
.PP
The author would like to thank Andy Tanenbaum for his guidance,
Duk Bekema for implementing the Common Subexpression Elimination phase
and writing the initial documentation of that phase,
Dick Grune for reading the manuscript of this report
and Ceriel Jacobs, Ed Keizer, Martin Kersten, Hans van Staveren
and the members of the S.T.W. user's group for their
interest and assistance.
.bp
.SH
References
.LP
.[
$LIST$
.]

View file

@ -76,11 +76,14 @@ EM is the assembly code of a virtual \fIstack machine\fR.
All operations are performed on the top of the stack.
For example, the statement "A := B + 3" may be expressed in EM as:
.DS
LOL -4 -- push local variable B
LOC 3 -- push constant 3
ADI 2 -- add two 2-byte items on top of
-- the stack and push the result
STL -2 -- pop A
.TS
l l.
LOL -4 -- push local variable B
LOC 3 -- push constant 3
ADI 2 -- add two 2-byte items on top of
-- the stack and push the result
STL -2 -- pop A
.TE
.DE
So EM is essentially a \fIpostfix\fR code.
.PP

View file

@ -225,9 +225,12 @@ allocation.
To summarize, the number of bytes a certain allocation would
save is computed as follows:
.DS
net_bytes_saved = bytes_saved - init_cost
bytes_saved = #occurrences * gains_per_occ
init_cost = #initializations * costs_per_init
.TS
l l.
net_bytes_saved = bytes_saved - init_cost
bytes_saved = #occurrences * gains_per_occ
init_cost = #initializations * costs_per_init
.TE
.DE
.PP
It is inherently more difficult to estimate the execution

View file

@ -11,12 +11,15 @@ the stack by the \fIcalling\fR procedure.
The ASP (Adjust Stack Pointer) instruction is used for this purpose.
A call in EM is shown in Fig. 8.1
.DS
Pascal: EM:
.TS
l l.
Pascal: EM:
f(a,2) LOC 2
LOE A
CAL F
ASP 4 -- pop 4 bytes
f(a,2) LOC 2
LOE A
CAL F
ASP 4 -- pop 4 bytes
.TE
Fig. 8.1 An example procedure call in Pascal and EM
.DE
@ -35,17 +38,20 @@ A stack adjustment may be delayed if there is some other stack adjustment
later on in the same basic block.
The two ASPs can be combined into one.
.DS
Pascal: EM: optimized EM:
.TS
l l l.
Pascal: EM: optimized EM:
f(a,2) LOC 2 LOC 2
g(3,b,c) LOE A LOE A
CAL F CAL F
ASP 4 LOE C
LOE C LOE B
LOE B LOC 3
LOC 3 CAL G
CAL G ASP 10
ASP 6
f(a,2) LOC 2 LOC 2
g(3,b,c) LOE A LOE A
CAL F CAL F
ASP 4 LOE C
LOE C LOE B
LOE B LOC 3
LOC 3 CAL G
CAL G ASP 10
ASP 6
.TE
Fig. 8.2 An example of local Stack Pollution
.DE
@ -85,19 +91,23 @@ the number of bytes pushed since the first ASP.
.LP
Condition 1. is not satisfied in Fig. 8.3.
.DS
Pascal: EM:
.TS
l l.
Pascal: EM:
5 + f(10) + g(30) LOC 5
LOC 10
CAL F
ASP 2 -- cannot be removed
LFR 2 -- push function result
ADI 2
LOC 30
CAL G
ASP 2
LFR 2
ADI 2
.TE
5 + f(10) + g(30) LOC 5
LOC 10
CAL F
ASP 2 -- cannot be removed
LFR 2 -- push function result
ADI 2
LOC 30
CAL G
ASP 2
LFR 2
ADI 2
Fig. 8.3 An illegal transformation
.DE
If the first ASP were removed (delayed), the first ADI would add
@ -105,19 +115,22 @@ If the first ASP were removed (delayed), the first ADI would add
.sp
Condition 2. is not satisfied in Fig. 8.4.
.DS
Pascal: EM:
.TS
l l.
Pascal: EM:
f(10) + 5 * g(30) LOC 10
CAL F
ASP 2
LFR 2
LOC 5
LOC 30
CAL G
ASP 2
LFR 2
MLI 2 -- 5 * g(30)
ADI 2
f(10) + 5 * g(30) LOC 10
CAL F
ASP 2
LFR 2
LOC 5
LOC 30
CAL G
ASP 2
LFR 2
MLI 2 -- 5 * g(30)
ADI 2
.TE
Fig. 8.4 A second illegal transformation
.DE

View file

@ -16,13 +16,16 @@ done by the EM Peephole Optimizer.
Strength reduction can also be applied
more generally to operators used in a loop.
.DS
i := 1; i := 1;
while i < 100 loop --> TMP := i * 118;
put(i * 118); while i < 100 loop
i := i + 1; put(TMP);
end loop; i := i + 1;
TMP := TMP + 118;
end loop;
.TS
l l.
i := 1; i := 1;
while i < 100 loop\ \ \ \ \ \ \ --> TMP := i * 118;
put(i * 118); while i < 100 loop
i := i + 1; put(TMP);
end loop; i := i + 1;
TMP := TMP + 118;
end loop;
.TE
Fig. 6.1 An example of Strenght Reduction
.DE

View file

@ -105,11 +105,11 @@ iv_expression * constant
.IP (2)
constant * iv_expression
.IP (3)
A[iv-expression] := (assign to array element)
A[iv-expression] := \kx(assign to array element)
.IP (4)
A[iv-expression] (use array element)
A[iv-expression] \h'|\nxu'(use array element)
.IP (5)
& A[iv-expression] (take address of array element)
& A[iv-expression] \h'|\nxu'(take address of array element)
.LP
(Note that EM has different instructions to use an array element,
store into one, or take the address of one, resp. LAR, SAR, and AAR).
@ -171,10 +171,13 @@ replaced by TMP.
For array optimizations, the replacement
depends on the form:
.DS
\fIform\fR \fIreplacement\fR
(3) A[iv-expr] := *TMP := (assign indirect)
(4) A[iv-expr] *TMP (use indirect)
(5) &A[iv-expr] TMP
.TS
l l l.
\fIform\fR \fIreplacement\fR
(3) A[iv-expr] := *TMP := (assign indirect)
(4) A[iv-expr] *TMP (use indirect)
(5) &A[iv-expr] TMP
.TE
.DE
The '*' denotes the indirect operator. (Note that
EM has different instructions to do
@ -199,14 +202,17 @@ must be negated.
.PP
The transformations are demonstrated by an example.
.DS
i := 100; i := 100;
while i > 1 loop TMP := (6-i) * 5;
X := (6-i) * 5 + 2; while i > 1 loop
Y := (6-i) * 5 - 8; --> X := TMP + 2;
i := i - 3; Y := TMP - 8;
end loop; i := i - 3;
TMP := TMP + 15;
end loop;
.TS
l l.
i := 100; i := 100;
while i > 1 loop TMP := (6-i) * 5;
X := (6-i) * 5 + 2; while i > 1 loop
Y := (6-i) * 5 - 8;\ \ \ \ \ \ \ --> X := TMP + 2;
i := i - 3; Y := TMP - 8;
end loop; i := i - 3;
TMP := TMP + 15;
end loop;
.TE
Fig. 6.2 Example of complex Strength Reduction transformations
.DE

View file

@ -64,12 +64,15 @@ The assignment must match one of the EM patterns below.
('x' is the candidate. 'ws' is the word size of the target machine.
'n' is any number.)
.DS
\fIpattern\fR \fIstep size\fR
INL x | +1
DEL x | -1
LOL x ; (INC | DEC) ; STL x | +1 | -1
LOL x ; LOC n ; (ADI ws | SBI ws) ; STL x | +n | -n
LOC n ; LOL x ; ADI ws ; STL x. +n
.TS
l l.
\fIpattern\fR \fIstep size\fR
INL x | +1
DEL x | -1
LOL x ; (INC | DEC) ; STL x | +1 | -1
LOL x ; LOC n ; (ADI ws | SBI ws) ; STL x | +n | -n
LOC n ; LOL x ; ADI ws ; STL x +n
.TE
.DE
From the patterns the step size of the induction variable
can also be determined.
@ -95,19 +98,22 @@ code in front of it.
If an expression is to be optimized, it must
be generated by the following syntax rules.
.DS
optimizable_expr:
iv_expr const mult |
const iv_expr mult |
address iv_expr address array_instr;
mult:
MLI ws |
MLU ws ;
array_instr:
LAR ws |
SAR ws |
AAR ws ;
const:
LOC n ;
.TS
l l.
optimizable_expr:
iv_expr const mult |
const iv_expr mult |
address iv_expr address array_instr;
mult:
MLI ws |
MLU ws ;
array_instr:
LAR ws |
SAR ws |
AAR ws ;
const:
LOC n ;
.TE
.DE
An 'address' is an EM instruction that loads an
address on the stack.
@ -120,36 +126,42 @@ instructions like LDL are an 'address'.
denote resp. the array address and the
array descriptor address).
.DS
address:
LAE |
LAL |
LOL if ps=ws |
LOE ,, |
LIL ,, |
LDL if ps=2*ws |
LDE ,, ;
.TS
l l.
address:
LAE |
LAL |
LOL if ps=ws |
LOE ,, |
LIL ,, |
LDL if ps=2*ws |
LDE ,, ;
.TE
.DE
The notion of an iv-expression was introduced earlier.
.DS
iv_expr:
iv_expr unair_op |
iv_expr iv_expr binary_op |
loopconst |
iv ;
unair_op:
NGI ws |
INC |
DEC ;
binary_op:
ADI ws |
ADU ws |
SBI ws |
SBU ws ;
loopconst:
const |
LOL x if x is not changed in loop ;
iv:
LOL x if x is an induction variable ;
.TS
l l.
iv_expr:
iv_expr unair_op |
iv_expr iv_expr binary_op |
loopconst |
iv ;
unair_op:
NGI ws |
INC |
DEC ;
binary_op:
ADI ws |
ADU ws |
SBI ws |
SBU ws ;
loopconst:
const |
LOL x if x is not changed in loop ;
iv:
LOL x if x is an induction variable ;
.TE
.DE
An iv_expression must satisfy one additional constraint:
it must use exactly one operand that is an induction