414 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			414 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
.NH 2
 | 
						|
Definition of the intermediate code
 | 
						|
.PP
 | 
						|
The intermediate code of the optimizer consists
 | 
						|
of several components:
 | 
						|
.IP -
 | 
						|
the object table
 | 
						|
.IP -
 | 
						|
the procedure table
 | 
						|
.IP -
 | 
						|
the em code
 | 
						|
.IP -
 | 
						|
the control flow graphs
 | 
						|
.IP -
 | 
						|
the loop table
 | 
						|
.LP -
 | 
						|
.PP
 | 
						|
These components are described in
 | 
						|
the next sections.
 | 
						|
The syntactic structure of every component
 | 
						|
is described by a set of context free syntax rules,
 | 
						|
with the following conventions:
 | 
						|
.DS
 | 
						|
x               a non-terminal symbol
 | 
						|
A               a terminal symbol (in capitals)
 | 
						|
x: a b c;       a grammar rule
 | 
						|
a | b           a or b
 | 
						|
(a)+            1 or more occurrences of a
 | 
						|
{a}             0 or more occurrences of a
 | 
						|
.DE
 | 
						|
.NH 3
 | 
						|
The object table
 | 
						|
.PP
 | 
						|
EM programs declare blocks of bytes rather than (global) variables.
 | 
						|
A typical program may declare 'HOL 7780'
 | 
						|
to allocate space for 8 I/O buffers,
 | 
						|
2 large arrays and 10 scalar variables.
 | 
						|
The optimizer wants to deal with
 | 
						|
.UL objects
 | 
						|
like variables, buffers and arrays
 | 
						|
and certainly not with huge numbers of bytes.
 | 
						|
Therefore the intermediate code contains information
 | 
						|
about which global objects are used.
 | 
						|
This information can be obtained from an EM program
 | 
						|
by just looking at the operands of instruction
 | 
						|
such as LOE, LAE, LDE, STE, SDE, INE, DEE and ZRE.
 | 
						|
.PP
 | 
						|
The object table consists of a list of
 | 
						|
.UL datablock
 | 
						|
entries.
 | 
						|
Each such entry represents a declaration like HOL, BSS,
 | 
						|
CON or ROM.
 | 
						|
There are five kinds of datablock entries.
 | 
						|
The fifth kind,
 | 
						|
UNKNOWN, denotes a declaration in a
 | 
						|
separately compiled file that is not made
 | 
						|
available to the optimizer.
 | 
						|
Each datablock entry contains the type of the block,
 | 
						|
its size, and a description of the objects that
 | 
						|
belong to it.
 | 
						|
If it is a rom,
 | 
						|
it also contains a list of values given
 | 
						|
as arguments to the rom instruction,
 | 
						|
provided that this list contains only integer numbers.
 | 
						|
An object has an offset (within its datablock)
 | 
						|
and a size.
 | 
						|
The size need not always be determinable.
 | 
						|
Both datablock and object contain a unique
 | 
						|
identifying number
 | 
						|
(see previous section for their use).
 | 
						|
.DS
 | 
						|
.UL syntax
 | 
						|
  object_table:
 | 
						|
                {datablock} ;
 | 
						|
  datablock:
 | 
						|
                D_ID            -- unique identifying number
 | 
						|
                PSEUDO          -- one of ROM,CON,BSS,HOL,UNKNOWN
 | 
						|
                SIZE            -- # bytes declared
 | 
						|
                FLAGS
 | 
						|
                {value}         -- contents of rom
 | 
						|
                {object} ;      -- objects of the datablock
 | 
						|
  object:
 | 
						|
                O_ID            -- unique identifying number
 | 
						|
                OFFSET          -- offset within the datablock
 | 
						|
                SIZE ;          -- size of the object in bytes
 | 
						|
  value:
 | 
						|
                argument ;
 | 
						|
.DE
 | 
						|
A data block has only one flag: "external", indicating
 | 
						|
whether the data label is externally visible.
 | 
						|
The syntax for "argument" will be given later on
 | 
						|
(see em_text).
 | 
						|
.NH 3
 | 
						|
The procedure table
 | 
						|
.PP
 | 
						|
The procedure table contains global information
 | 
						|
about all procedures that are made available
 | 
						|
to the optimizer
 | 
						|
and that are needed by the EM program.
 | 
						|
(Library units may not be needed, see section 3.5).
 | 
						|
The table has one entry for
 | 
						|
every procedure.
 | 
						|
.DS
 | 
						|
.UL syntax
 | 
						|
  procedure_table:
 | 
						|
                {procedure}
 | 
						|
  procedure:
 | 
						|
                P_ID            -- unique identifying number
 | 
						|
                #LABELS         -- number of instruction labels
 | 
						|
                #LOCALS         -- number of bytes for locals 
 | 
						|
		#FORMALS        -- number of bytes for formals
 | 
						|
                FLAGS           -- flag bits
 | 
						|
                calling         -- procedures called by this one
 | 
						|
                change          -- info about global variables changed
 | 
						|
                use ;           -- info about global variables used
 | 
						|
  calling:
 | 
						|
                {P_ID} ;        -- procedures called
 | 
						|
  change:
 | 
						|
                ext             -- external variables changed
 | 
						|
                FLAGS ;
 | 
						|
  use:
 | 
						|
                FLAGS ;
 | 
						|
  ext:
 | 
						|
                {O_ID} ;        -- a set of objects
 | 
						|
.DE
 | 
						|
.PP
 | 
						|
The number of bytes of formal parameters accessed by
 | 
						|
a procedure is determined by the front ends and
 | 
						|
passed via a message (parameter message) to the optimizer.
 | 
						|
If the front end is not able to determine this number
 | 
						|
(e.g. the parameter may be an array of dynamic size or
 | 
						|
the procedure may have a variable number of arguments) the attribute
 | 
						|
contains the value 'UNKNOWN_SIZE'.
 | 
						|
.sp 0
 | 
						|
A procedure has the following flags:
 | 
						|
.IP -
 | 
						|
external: true if the proc. is externally visible
 | 
						|
.IP -
 | 
						|
bodyseen: true if its code is available as EM text
 | 
						|
.IP -
 | 
						|
calunknown: true if it calls a procedure that has its bodyseen
 | 
						|
flag not set
 | 
						|
.IP -
 | 
						|
environ: true if it uses or changes a (non-global) variable in
 | 
						|
a lexically enclosing procedure
 | 
						|
.IP -
 | 
						|
lpi: true if is used as operand of an lpi instruction, so
 | 
						|
it may be called indirect
 | 
						|
.LP
 | 
						|
The change and use attributes both have one flag: "indirect",
 | 
						|
indicating whether the procedure does a 'use indirect'
 | 
						|
or a 'store indirect' (indirect means through a pointer).
 | 
						|
.NH 3
 | 
						|
The EM text
 | 
						|
.PP
 | 
						|
The EM text contains the EM instructions.
 | 
						|
Every EM instruction has an operation code (opcode)
 | 
						|
and 0 or 1 operands.
 | 
						|
EM pseudo instructions can have more than
 | 
						|
1 operand.
 | 
						|
The opcode is just a small (8 bit) integer.
 | 
						|
.sp
 | 
						|
There are several kinds of operands, which we will
 | 
						|
refer to as
 | 
						|
.UL types.
 | 
						|
Many EM instructions can have more than one type of operand.
 | 
						|
The types and their encodings in Compact Assembly Language
 | 
						|
are discussed extensively in.
 | 
						|
.[~[
 | 
						|
keizer architecture 
 | 
						|
.], section 11.2]
 | 
						|
Of special interest is the way numeric values
 | 
						|
are represented.
 | 
						|
Of prime importance is the machine independency of
 | 
						|
the representation.
 | 
						|
Ultimately, one could store every integer
 | 
						|
just as a string of the characters '0' to '9'.
 | 
						|
As doing arithmetic on strings is awkward,
 | 
						|
Compact Assembly Language allows several alternatives.
 | 
						|
The main idea is to look at the value of the integer.
 | 
						|
Integers that fit in 16, 32 or 64 bits are
 | 
						|
represented as a row of resp. 2, 4 and 8 bytes,
 | 
						|
preceded by an indication of how many bytes are used.
 | 
						|
Longer integers are represented as strings;
 | 
						|
this is only allowed within pseudo instructions, however.
 | 
						|
This concept works very well for target machines
 | 
						|
with reasonable word sizes.
 | 
						|
At present, most ACK software cannot be used for word sizes
 | 
						|
higher than 32 bits,
 | 
						|
although the handles for using larger word sizes are
 | 
						|
present in the design of the EM code.
 | 
						|
In the intermediate code we essentially use the
 | 
						|
same ideas.
 | 
						|
We allow three representations of integers.
 | 
						|
.IP -
 | 
						|
integers that fit in a short are represented as a short
 | 
						|
.IP -
 | 
						|
integers that fit in a long but not in a short are represented
 | 
						|
as longs
 | 
						|
.IP -
 | 
						|
all remaining integers are represented as strings
 | 
						|
(only allowed in pseudos).
 | 
						|
.LP
 | 
						|
The terms short and long are defined in
 | 
						|
.[~[
 | 
						|
ritchie reference manual programming language
 | 
						|
.], section 4]
 | 
						|
and depend only on the source machine
 | 
						|
(i.e. the machine on which ACK runs),
 | 
						|
not on the target machines.
 | 
						|
For historical reasons a long will often be called an
 | 
						|
.UL offset.
 | 
						|
.PP
 | 
						|
Operands can also be instruction labels,
 | 
						|
objects or procedures.
 | 
						|
Instruction labels are denoted by a
 | 
						|
.UL label
 | 
						|
.UL identifier,
 | 
						|
which can be distinguished from a normal identifier.
 | 
						|
.sp
 | 
						|
The operand of a pseudo instruction can be a list of
 | 
						|
.UL arguments.
 | 
						|
Arguments can have the same type as operands, except
 | 
						|
for the type short, which is not used for arguments.
 | 
						|
Furthermore, an argument can be a string or
 | 
						|
a string representation of a signed integer, unsigned integer
 | 
						|
or floating point number.
 | 
						|
If the number of arguments is not fully determined by
 | 
						|
the pseudo instruction (e.g. a ROM pseudo can have any number
 | 
						|
of arguments), then the list is terminated by a special
 | 
						|
argument of type CEND.
 | 
						|
.DS
 | 
						|
.UL syntax
 | 
						|
  em_text:
 | 
						|
                {line} ;
 | 
						|
  line:
 | 
						|
                INSTR           -- opcode
 | 
						|
                OPTYPE          -- operand type
 | 
						|
                operand ;
 | 
						|
  operand:
 | 
						|
                empty |         -- OPTYPE = NO
 | 
						|
                SHORT |         -- OPTYPE = SHORT
 | 
						|
                OFFSET |        -- OPTYPE = OFFSET
 | 
						|
                LAB_ID |        -- OPTYPE = INSTRLAB
 | 
						|
                O_ID |          -- OPTYPE = OBJECT
 | 
						|
                P_ID |          -- OPTYPE = PROCEDURE
 | 
						|
                {argument} ;    -- OPTYPE = LIST
 | 
						|
  argument:
 | 
						|
                ARGTYPE
 | 
						|
                arg ;
 | 
						|
  arg:
 | 
						|
                empty |         -- ARGTYPE = CEND
 | 
						|
                OFFSET |
 | 
						|
                LAB_ID |
 | 
						|
                O_ID |
 | 
						|
                P_ID |
 | 
						|
                string |        -- ARGTYPE = STRING
 | 
						|
                const ;         -- ARGTYPE = ICON,UCON or FCON
 | 
						|
  string:
 | 
						|
                LENGTH          -- number of characters
 | 
						|
                {CHARACTER} ;
 | 
						|
  const:
 | 
						|
                SIZE            -- number of bytes
 | 
						|
                string ;        -- string representation of (un)signed
 | 
						|
                                -- or floating point constant
 | 
						|
.DE
 | 
						|
.NH 3
 | 
						|
The control flow graphs
 | 
						|
.PP
 | 
						|
Each procedure can be divided
 | 
						|
into a number of basic blocks.
 | 
						|
A basic block is a piece of code with
 | 
						|
no jumps in, except at the beginning,
 | 
						|
and no jumps out, except at the end.
 | 
						|
.PP
 | 
						|
Every basic block has a set of
 | 
						|
.UL successors,
 | 
						|
which are basic blocks that can follow it immediately in
 | 
						|
the dynamic execution sequence.
 | 
						|
The
 | 
						|
.UL predecessors
 | 
						|
are the basic blocks of which this one
 | 
						|
is a successor.
 | 
						|
The successor and predecessor attributes
 | 
						|
of all basic blocks of a single procedure
 | 
						|
are said to form the
 | 
						|
.UL control
 | 
						|
.UL flow
 | 
						|
.UL graph
 | 
						|
of that procedure.
 | 
						|
.PP
 | 
						|
Another important attribute is the
 | 
						|
.UL immediate
 | 
						|
.UL dominator.
 | 
						|
A basic block B dominates a block C if
 | 
						|
every path in the graph from the procedure entry block
 | 
						|
to C goes through B.
 | 
						|
The immediate dominator of C is the closest dominator
 | 
						|
of C on any path from the entry block.
 | 
						|
(Note that the dominator relation is transitive,
 | 
						|
so the immediate dominator is well defined.)
 | 
						|
.PP
 | 
						|
A basic block also has an attribute containing
 | 
						|
the identifiers of every
 | 
						|
.UL loop
 | 
						|
that the block belongs to (see next section for loops).
 | 
						|
.DS
 | 
						|
.UL syntax
 | 
						|
  control_flow_graph:
 | 
						|
                {basic_block} ;
 | 
						|
  basic_block:
 | 
						|
                B_ID            -- unique identifying number
 | 
						|
                #INSTR          -- number of EM instructions
 | 
						|
                succ
 | 
						|
                pred
 | 
						|
                idom            -- immediate dominator
 | 
						|
                loops           -- set of loops
 | 
						|
		FLAGS ;         -- flag bits
 | 
						|
  succ:
 | 
						|
                {B_ID} ;
 | 
						|
  pred:
 | 
						|
                {B_ID} ;
 | 
						|
  idom:
 | 
						|
                B_ID ;
 | 
						|
  loops:
 | 
						|
                {LP_ID} ;
 | 
						|
.DE
 | 
						|
The flag bits can have the values 'firm' and 'strong',
 | 
						|
which are explained below.
 | 
						|
.NH 3
 | 
						|
The loop tables
 | 
						|
.PP
 | 
						|
Every procedure has an associated
 | 
						|
.UL loop
 | 
						|
.UL table
 | 
						|
containing information about all the loops
 | 
						|
in the procedure.
 | 
						|
Loops can be detected by a close inspection of
 | 
						|
the control flow graph.
 | 
						|
The main idea is to look for two basic blocks,
 | 
						|
B and C, for which the following holds:
 | 
						|
.IP -
 | 
						|
B is a successor of C
 | 
						|
.IP -
 | 
						|
B is a dominator of C
 | 
						|
.LP
 | 
						|
B is called the loop
 | 
						|
.UL entry
 | 
						|
and C is called the loop
 | 
						|
.UL end.
 | 
						|
Intuitively, C contains a jump backwards to
 | 
						|
the beginning of the loop (B).
 | 
						|
.PP
 | 
						|
A loop L1 is said to be
 | 
						|
.UL nested
 | 
						|
within loop L2 if all basic blocks of L1
 | 
						|
are also part of L2.
 | 
						|
It is important to note that loops could
 | 
						|
originally be written as a well structured for -or
 | 
						|
while loop or as a messy goto loop.
 | 
						|
Hence loops may partly overlap without one
 | 
						|
being nested inside the other.
 | 
						|
The
 | 
						|
.UL nesting
 | 
						|
.UL level
 | 
						|
of a loop is the number of loops in
 | 
						|
which it is nested (so it is 0 for
 | 
						|
an outermost loop).
 | 
						|
The details of loop detection will be discussed later.
 | 
						|
.PP
 | 
						|
It is often desirable to know whether a
 | 
						|
basic block gets executed during every iteration
 | 
						|
of a loop.
 | 
						|
This leads to the following definitions:
 | 
						|
.IP -
 | 
						|
A basic block B of a loop L is said to be a \fIfirm\fR block
 | 
						|
of L if B is executed on all successive iterations of L,
 | 
						|
with the only possible exception of the last iteration.
 | 
						|
.IP -
 | 
						|
A basic block B of a loop L is said to be a \fIstrong\fR block
 | 
						|
of L if B is executed on all successive iterations of L.
 | 
						|
.LP
 | 
						|
Note that a strong block is also a firm block.
 | 
						|
If a block is part of a conditional statement, it is neither
 | 
						|
strong nor firm, as it may be skipped during some iterations
 | 
						|
(see Fig. 3.2).
 | 
						|
.DS
 | 
						|
loop
 | 
						|
       if cond1 then
 | 
						|
	      ... -- this code will not
 | 
						|
		  -- result in a firm or strong block
 | 
						|
       end if;
 | 
						|
       ...  -- strong (always executed)
 | 
						|
       exit when cond2;
 | 
						|
       ...  -- firm (not executed on
 | 
						|
            -- last iteration).
 | 
						|
end loop;
 | 
						|
 | 
						|
Fig. 3.2 Example of firm and strong block
 | 
						|
.DE
 | 
						|
.DS
 | 
						|
.UL syntax
 | 
						|
  looptable:
 | 
						|
                {loop} ;
 | 
						|
  loop:
 | 
						|
                LP_ID           -- unique identifying number
 | 
						|
                LEVEL           -- loop nesting level
 | 
						|
                entry           -- loop entry block
 | 
						|
                end ;
 | 
						|
  entry:
 | 
						|
                B_ID ;
 | 
						|
  end:
 | 
						|
                B_ID ;
 | 
						|
.DE
 |