ack/util/ego/sr/sr_reduce.c
George Koehler e841adf970 Fix END pseudo in util/ego/sr; closes #203
The strength reduction (SR) phase mishandled the END pseudo when SR
found at least 2 different expressions in the same loop, and SR made a
new header for the loop.

Since 1987 (commit 159b84e), SR appended the new header to the end of
the procedure.  Later, SR fixed the header by adding a BRA branch to
the loop's entry, and moving the END pseudo from the previous basic
block to the header.  If SR found multiple expressions, it called
fix_header() multiple times, and tried to move the END again.  The
extra move failed assert(INSTR(e) == ps_end), or moved another line
after the END, so opt2 (the peephole optimizer) would crash.

Fix by removing fix_header() and moving the code to make_header(), so
SR adds the BRA and moves the END when it makes the header, and does
so only once for each header.

Adjust init_code(), which inserts code into a header.  Stop checking
for an empty header, because make_header() now adds BRA.  After
inserting code before BRA, don't move LP_INSTR, because later
insertions should go before BRA, not after END.
2019-11-15 11:50:05 -05:00

729 lines
17 KiB
C

/* $Id$ */
/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*/
/* S T R E N G T H R E D U C T I O N
*
* S R _ R E D U C E . C
*
*/
#include <stdlib.h>
#include <em_pseu.h>
#include <em_reg.h>
#include <em_mes.h>
#include <em_mnem.h>
#include <em_spec.h>
#include "../share/types.h"
#include "sr.h"
#include "../share/debug.h"
#include "../share/alloc.h"
#include "../share/def.h"
#include "../share/global.h"
#include "../share/utils.h"
#include "sr_aux.h"
#include "../share/lset.h"
#include "sr_xform.h"
#include "sr_reduce.h"
#include "sr_expr.h"
STATIC lset avail;
/* If an expression such as "iv * const" or "A[iv]" is
* used more than once in a loop, we only use one temporary
* local for it and reuse this local each time.
* After the first occurrence, the expression is said to
* be available.
*/
STATIC int regtyp(code)
code_p code;
{
switch(code->co_instr) {
case op_mli:
case op_mlu:
case op_sli:
case op_slu:
return reg_any;
default:
return reg_pointer;
}
/* NOTREACHED */
}
STATIC void gen_regmes(tmp,score,code,p)
offset tmp;
int score;
code_p code;
proc_p p;
{
/* generate a register message for the temporary variable and
* insert it at the start of the procedure.
*/
line_p l,pro;
l = reg_mes(tmp,code->co_tmpsize,regtyp(code),score);
pro = p->p_start->b_start; /* every proc. begins with a PRO pseudo */
l->l_next = pro->l_next;
PREV(l->l_next) = l;
pro->l_next = l;
PREV(l) = pro;
}
STATIC line_p newcode(code,tmp)
code_p code;
offset tmp;
{
/* Construct the EM code that will replace the reducible code,
* e.g. iv * c -> tmp
* a[iv] -> *tmp
*/
line_p l;
switch(code->co_instr) {
case op_mli:
case op_mlu:
case op_sli:
case op_slu:
/* new code is just a LOL tmp */
l = int_line(tmp);
l->l_instr = op_lol;
break;
case op_aar:
/* New code is a LOAD tmp, where tmp is a
* pointer variable, so the actual EM code
* depends on the pointer size.
*/
l = move_pointer(tmp,LOAD);
break;
case op_lar:
/* New code is a load-indirect */
l = int_line(tmp);
l->l_instr = op_lil;
break;
case op_sar:
/* New code is a store-indirect */
l = int_line(tmp);
l->l_instr = op_sil;
break;
default:
assert(FALSE);
}
return l;
}
STATIC void replcode(code,text)
code_p code;
line_p text;
{
/* Replace old code (extending from code->co_lfirst to
* code->co_llast) by new code (headed by 'text').
*/
line_p l, l1, l2;
for (l = text; l->l_next != (line_p) 0; l = l->l_next);
/* 'l' now points to last instruction of text */
l1 = PREV(code->co_lfirst); /* instruction just before old code */
l2 = code->co_llast->l_next; /* instruction just behind old code */
if (l1 == (line_p) 0) {
code->co_block->b_start = text;
PREV(text) = (line_p) 0;
} else {
l1->l_next = text;
PREV(text) = l1;
}
if (l2 != (line_p) 0) {
PREV(l2) = l;
}
l->l_next = l2;
code->co_llast->l_next = (line_p) 0;
/* Note that the old code is still accessible via code->co_lfirst */
}
STATIC line_p add_code(pl, l)
line_p pl, l;
{
if (! pl) {
PREV(l) = 0;
}
else {
line_p n = pl->l_next;
DLINK(pl, l);
if (n) {
while (l->l_next) l = l->l_next;
DLINK(l, n);
}
l = pl;
}
return l;
}
STATIC void init_code(code_p code, offset tmp)
{
/* Generate code to set up the temporary local.
* For multiplication, its initial value is const*iv_expr,
* for array operations it is &a[iv_expr] (where iv_expr is
* an expression that is a linear function of the induc. var.
* This code is inserted immediately before the loop entry.
* As the initializing code looks very much like the
* reduced code, we reuse that (old) code.
*/
line_p l, *p;
l = code->co_llast; /* the mli, lar etc. instruction */
switch(INSTR(l)) {
case op_mli:
case op_mlu:
/* reduced code is: iv_expr * lc (or lc * iv_expr)
* init_code is: tmp = iv_expr * lc (or lc*iv_expr)
* So we just insert a 'STL tmp'.
*/
l->l_next = int_line(tmp);
l->l_next->l_instr = op_stl;
break;
case op_sli:
case op_slu:
/* reduced code is: iv_expr << lc
* init_code is: tmp = iv_expr << lc
* So we just insert a 'STL tmp'.
*/
l->l_next = int_line(tmp);
l->l_next->l_instr = op_stl;
break;
case op_lar:
case op_sar:
/* reduced code is: ...= A[iv_expr] resp.
* A[iv]_expr = ..
* init_code is: tmp = &A[iv_expr].
* So just change the lar or sar into a aar ...
*/
l->l_instr = (byte) op_aar;
/* ... and fall through !! */
case op_aar:
/* append code to store a pointer in temp. local */
l->l_next = move_pointer(tmp,STORE);
break;
default:
assert(FALSE); /* non-reducible instruction */
}
PREV(l->l_next) = l;
/* Now insert the code at the end of the header block */
p = &code->co_loop->LP_INSTR;
if (INSTR((*p)) == op_bra) {
/* Add code before branching to the loop. */
add_code(PREV((*p)), code->co_lfirst);
} else {
/* Add code before falling into the loop. */
add_code(*p, code->co_lfirst);
while (l->l_next)
l = l->l_next;
*p = l; /* new last instruction */
}
}
STATIC void incr_code(code,tmp)
code_p code;
offset tmp;
{
/* Generate code to increment the temporary local variable.
* The variable is incremented by
* 1) multiply --> step value of iv * loop constant
* 2) array --> step value of iv * element size
* This value can be determined statically.
* If the induction variable is used in a linear
* expression in which its sign is negative
* (such as in: "5-(6-(-iv))" ), this value is negated.
* The generated code looks like:
* LOL tmp ; LOC incr ; ADI ws ; STL tmp
* For pointer-increments we generate a "ADP c", rather than
* a "LOC c; ADS ws".
* This code is put just after the code that increments
* the induction variable.
*/
line_p load_tmp, loc, add, store_tmp, l;
add = newline(OPSHORT);
SHORT(add) = ws; /* the add instruction, can be ADI,ADU or ADS */
switch(code->co_instr) {
case op_mli:
case op_mlu:
loc = int_line(
code->co_sign *
off_set(code->c_o.co_loadlc) *
code->co_iv->iv_step);
loc->l_instr = op_loc;
add->l_instr = op_adi;
load_tmp = int_line(tmp);
load_tmp->l_instr = op_lol;
store_tmp = int_line(tmp);
store_tmp->l_instr = op_stl;
break;
case op_sli:
case op_slu:
loc = int_line(
code->co_sign *
code->co_iv->iv_step *
(1 << off_set(code->c_o.co_loadlc)));
loc->l_instr = op_loc;
add->l_instr = op_adi;
load_tmp = int_line(tmp);
load_tmp->l_instr = op_lol;
store_tmp = int_line(tmp);
store_tmp->l_instr = op_stl;
break;
case op_lar:
case op_sar:
case op_aar:
loc = (line_p) 0;
add = int_line(
code->co_sign *
code->co_iv->iv_step *
elemsize(code->c_o.co_desc));
add->l_instr = op_adp;
load_tmp = move_pointer(tmp,LOAD);
store_tmp = move_pointer(tmp,STORE);
break;
default:
assert(FALSE);
}
/* Now we've got pieces of code to load the temp. local,
* load the constant, add the two and store the result in
* the local. This code will be put just after the code that
* increments the induction variable.
*/
if (loc != (line_p) 0) concatenate(load_tmp,loc);
concatenate(load_tmp,add);
concatenate(load_tmp,store_tmp);
/* Now load_tmp points to a list of EM instructions */
l = code->co_iv->iv_incr;
if (l->l_next != (line_p) 0) {
DLINK(store_tmp,l->l_next);
}
DLINK(l,load_tmp); /* doubly link them */
}
STATIC void remcode(c)
code_p c;
{
line_p l, next;
for (l = c->co_lfirst; l != (line_p) 0; l = next) {
next = l->l_next;
oldline(l);
}
oldcinfo(c);
}
STATIC bool same_address(l1,l2,vars)
line_p l1,l2;
lset vars;
{
/* See if l1 and l2 load the same address */
if (INSTR(l1) != INSTR(l2)) return FALSE;
switch(INSTR(l1)) {
case op_lae:
return OBJ(l1) == OBJ(l2);
case op_lal:
return off_set(l1) == off_set(l2);
case op_lol:
return ps == ws &&
off_set(l1) == off_set(l2) &&
is_loopconst(l1,vars);
case op_ldl:
return ps == 2*ws &&
off_set(l1) == off_set(l2) &&
is_loopconst(l1,vars);
default:
return FALSE;
}
}
STATIC bool same_expr(lb1,le1,lb2,le2)
line_p lb1,le1,lb2,le2;
{
/* See if the code from lb1 to le1 is the same
* expression as the code from lb2 to le2.
*/
register line_p l1,l2;
l1 = lb1;
l2 = lb2;
for (;;) {
if (INSTR(l1) != INSTR(l2)) return FALSE;
switch(TYPE(l1)) {
case OPSHORT:
if (TYPE(l2) != OPSHORT ||
SHORT(l1) != SHORT(l2)) return FALSE;
break;
case OPOFFSET:
if (TYPE(l2) != OPOFFSET ||
OFFSET(l1) != OFFSET(l2)) return FALSE;
break;
case OPNO:
break;
default:
return FALSE;
}
if (l1 == le1 ) return l2 == le2;
if (l2 == le2) return FALSE;
l1 = l1->l_next;
l2 = l2->l_next;
}
}
STATIC bool same_code(c1,c2,vars)
code_p c1,c2;
lset vars;
{
/* See if c1 and c2 compute the same expression. Two array
* references can be the same even if one is e.g a fetch
* and the other a store.
*/
switch(c1->co_instr) {
case op_mli:
case op_mlu:
case op_sli:
case op_slu:
return c1->co_instr == c2->co_instr &&
off_set(c1->c_o.co_loadlc) ==
off_set(c2->c_o.co_loadlc) &&
same_expr(c1->co_ivexpr,c1->co_endexpr,
c2->co_ivexpr,c2->co_endexpr);
case op_aar:
case op_lar:
case op_sar:
return ( c2->co_instr == op_aar ||
c2->co_instr == op_lar ||
c2->co_instr == op_sar) &&
same_expr(c1->co_ivexpr,c1->co_endexpr,
c2->co_ivexpr,c2->co_endexpr) &&
same_address(c1->c_o.co_desc,c2->c_o.co_desc,vars) &&
same_address(c1->co_lfirst,c2->co_lfirst,vars);
default:
assert(FALSE);
}
/* NOTREACHED */
}
STATIC code_p available(c,vars)
code_p c;
lset vars;
{
/* See if the code is already available.
* If so, return a pointer to the first occurrence
* of the code.
*/
Lindex i;
code_p cp;
for (i = Lfirst(avail); i != (Lindex) 0; i = Lnext(i,avail)) {
cp = (code_p) Lelem(i);
if (same_code(c,cp,vars)) {
return cp;
}
}
return (code_p) 0;
}
STATIC void reduce(code_p code, lset vars)
{
/* Perform the actual transformations. The code on the left
* gets transformed into the code on the right. Note that
* each piece of code is assigned a name, that will be
* used to describe the whole process.
*
* t = iv * 118; (init_code)
* do ---> do
* .. iv * 118 .. .. t .. (new_code)
* iv++; iv++;
* t += 118; (incr_code)
* od od
*/
offset tmp;
code_p ac;
OUTTRACE("succeeded!!",0);
if ((ac = available(code,vars)) != (code_p) 0) {
/* The expression is already available, so we
* don't have to generate a new temporary local for it.
*/
OUTTRACE("expression was already available",0);
replcode(code,newcode(code,ac->co_temp));
remcode(code);
} else {
make_header(code->co_loop);
/* make sure there's a header block */
tmp = tmplocal(curproc,(offset) code->co_tmpsize);
code->co_temp = tmp;
/* create a new local variable in the stack frame
* of current proc.
*/
gen_regmes(tmp,3,code,curproc); /* generate register message */
/* score is set to 3, as TMP is used at least 3 times */
replcode(code,newcode(code,tmp));
OUTTRACE("replaced old code by new code",0);
/* Construct the EM-code that will replace the reducible code
* and replace the old code by the new code.
*/
init_code(code,tmp);
OUTTRACE("emitted initializing code",0);
/* Emit code to initialize the temporary local. This code is
* put in the loop header block.
*/
incr_code(code,tmp); /* emit code to increment temp. local */
OUTTRACE("emitted increment code",0);
Ladd(code,&avail);
}
}
STATIC void try_multiply(lp,ivs,vars,b,mul)
loop_p lp;
lset ivs,vars;
bblock_p b;
line_p mul;
{
/* See if we can reduce the strength of the multiply
* instruction. If so, then set up the global common
* data structure 'c' (containing information about the
* code to be reduced) and call 'reduce'.
*/
line_p l2,lbegin;
iv_p iv;
code_p c;
int sign;
VL(mul);
OUTTRACE("trying multiply instruction on line %d",linecount);
if (ovfl_harmful && !IS_STRONG(b)) return;
/* If b is not a strong block, optimization may
* introduce an overflow error in the initializing code.
*/
l2 = PREV(mul); /* Instruction before the multiply */
if ( (is_ivexpr(l2,ivs,vars,&lbegin,&iv,&sign)) &&
is_const(PREV(lbegin)) ) {
/* recognized expression "const * iv_expr" */
c = newcinfo();
c->c_o.co_loadlc = PREV(l2);
c->co_endexpr = l2;
c->co_lfirst = PREV(lbegin);
} else {
if (is_const(l2) &&
(is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign))) {
/* recognized "iv * const " */
c = newcinfo();
c->c_o.co_loadlc = l2;
c->co_endexpr = PREV(l2);
c->co_lfirst = lbegin;
} else {
OUTTRACE("failed",0);
return;
}
}
/* common part for both patterns */
c->co_iv = iv;
c->co_loop = lp;
c->co_block = b;
c->co_llast = mul;
c->co_ivexpr = lbegin;
c->co_sign = sign;
c->co_tmpsize = ws; /* temp. local is a word */
c->co_instr = INSTR(mul);
OUTVERBOSE("sr: multiply in proc %d loop %d",
curproc->p_id, lp->lp_id);
Ssr++;
reduce(c,vars);
}
STATIC void try_leftshift(lp,ivs,vars,b,shft)
loop_p lp;
lset ivs,vars;
bblock_p b;
line_p shft;
{
/* See if we can reduce the strength of the leftshift
* instruction. If so, then set up the global common
* data structure 'c' (containing information about the
* code to be reduced) and call 'reduce'.
*/
line_p l2,lbegin;
iv_p iv;
code_p c;
int sign;
VL(shft);
OUTTRACE("trying leftshift instruction on line %d",linecount);
if (ovfl_harmful && !IS_STRONG(b)) return;
/* If b is not a strong block, optimization may
* introduce an overflow error in the initializing code.
*/
l2 = PREV(shft); /* Instruction before the shift */
if (is_const(l2) && off_set(l2) > sli_threshold &&
(is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign))) {
/* recognized "iv << const " */
c = newcinfo();
c->c_o.co_loadlc = l2;
c->co_endexpr = PREV(l2);
c->co_lfirst = lbegin;
} else {
OUTTRACE("failed",0);
return;
}
c->co_iv = iv;
c->co_loop = lp;
c->co_block = b;
c->co_llast = shft;
c->co_ivexpr = lbegin;
c->co_sign = sign;
c->co_tmpsize = ws; /* temp. local is a word */
c->co_instr = INSTR(shft);
OUTVERBOSE("sr: leftshift in proc %d loop %d",
curproc->p_id, lp->lp_id);
Ssr++;
reduce(c,vars);
}
STATIC void try_array(lp,ivs,vars,b,arr)
loop_p lp;
lset ivs,vars;
bblock_p b;
line_p arr;
{
/* See if we can reduce the strength of the array reference
* instruction 'arr'.
*/
line_p l2,l3,lbegin;
iv_p iv;
code_p c;
int sign;
/* Try to recognize the pattern:
* LOAD ADDRES OF A
* LOAD IV
* LOAD ADDRESS OF DESCRIPTOR
*/
VL(arr);
OUTTRACE("trying array instruction on line %d",linecount);
if (arrbound_harmful && !IS_STRONG(b)) return;
/* If b is not a strong block, optimization may
* introduce an array bound error in the initializing code.
*/
l2 = PREV(arr);
if (is_caddress(l2,vars) &&
(INSTR(arr) == op_aar || elemsize(l2) == ws) &&
(is_ivexpr(PREV(l2),ivs,vars,&lbegin,&iv,&sign)) ) {
l3 = PREV(lbegin);
if (is_caddress(l3,vars)) {
c = newcinfo();
c->co_iv = iv;
c->co_loop = lp;
c->co_block = b;
c->co_lfirst = l3;
c->co_llast = arr;
c->co_ivexpr = lbegin;
c->co_endexpr = PREV(l2);
c->co_sign = sign;
c->co_tmpsize = ps; /* temp. local is pointer */
c->co_instr = INSTR(arr);
c->c_o.co_desc = l2;
OUTVERBOSE("sr: array in proc %d loop %d",
curproc->p_id,lp->lp_id);
Ssr++;
reduce(c,vars);
}
}
}
STATIC void clean_avail()
{
Lindex i;
for (i = Lfirst(avail); i != (Lindex) 0; i = Lnext(i,avail)) {
oldcinfo(Lelem(i));
}
Ldeleteset(avail);
}
void strength_reduction(lp,ivs,vars)
loop_p lp; /* description of the loop */
lset ivs; /* set of induction variables of the loop */
lset vars; /* set of local variables changed in loop */
{
/* Find all expensive instructions (leftshift, multiply, array) and see
* if they can be reduced. We branch to several instruction-specific
* routines (try_...) that check if reduction is possible,
* and that set up a common data structure (code_info).
* The actual transformations are done by 'reduce', that is
* essentially instruction-independend.
*/
bblock_p b;
line_p l, next;
Lindex i;
avail = Lempty_set();
for (i = Lfirst(lp->LP_BLOCKS); i != (Lindex) 0;
i = Lnext(i,lp->LP_BLOCKS)) {
b = (bblock_p) Lelem(i);
for (l = b->b_start; l != (line_p) 0; l = next) {
next = l->l_next;
if (TYPE(l) == OPSHORT && SHORT(l) == ws) {
switch(INSTR(l)) {
case op_sli:
case op_slu:
try_leftshift(lp,ivs,vars,b,l);
break;
case op_mlu:
case op_mli:
try_multiply(lp,ivs,vars,b,l);
break;
case op_lar:
case op_sar:
case op_aar:
try_array(lp,ivs,vars,b,l);
break;
}
}
}
}
clean_avail();
}