Optimize procedures that do both a / b and a % b.

Enable this in CS for PowerPC; disable it for all other machines.
PowerPC has no remainder instruction; the back end uses division to
compute remainder.  If CS finds both a / b and a % b, then CS now
rewrites a % b as a - b * (a / b) and computes a / b only once.  This
removes an extra division in the PowerPC code, so it saves both time
and space.

I have not considered whether to enable this optimization for other
machines.  It might be less useful in machines with a remainder
instruction.  Also, if a % b occurs before a / b, the EM code gets a
DUP.  PowerPC ncg handles this DUP well; other back ends might not.
This commit is contained in:
George Koehler 2018-03-05 13:32:06 -05:00
parent f26259caac
commit b1b737ed6c
19 changed files with 156 additions and 23 deletions

View file

@ -88,12 +88,13 @@ struct occur {
#define UNAIR_OP 6
#define BINAIR_OP 7
#define TERNAIR_OP 8
#define KILL_ENTITY 9
#define SIDE_EFFECTS 10
#define FIDDLE_STACK 11
#define IGNORE 12
#define HOPELESS 13
#define BBLOCK_END 14
#define REMAINDER 9
#define KILL_ENTITY 10
#define SIDE_EFFECTS 11
#define FIDDLE_STACK 12
#define IGNORE 13
#define HOPELESS 14
#define BBLOCK_END 15
struct avail {
avail_p av_before; /* Ptr to earlier discovered expressions. */

View file

@ -54,6 +54,7 @@ STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2)
case UNAIR_OP:
return avp1->av_operand == avp2->av_operand;
case BINAIR_OP:
case REMAINDER:
if (commutative(avp1->av_instr & BMASK))
return avp1->av_oleft == avp2->av_oleft &&
avp1->av_oright == avp2->av_oright
@ -124,6 +125,7 @@ STATIC void copy_avail(int kind, avail_p src, avail_p dst)
dst->av_operand = src->av_operand;
break;
case BINAIR_OP:
case REMAINDER:
dst->av_oleft = src->av_oleft;
dst->av_oright = src->av_oright;
break;
@ -160,7 +162,8 @@ avail_p av_enter(avail_p avp, occur_p ocp, int kind)
/* Remember local, if any, that holds result. */
if (avp->av_instr != (byte) INSTR(last)) {
/* Only possible when instr is the implicit AAR in
* a LAR or SAR.
* a LAR or SAR, or the implicit DVI in an RMI, or
* DVU in RMU.
*/
ravp->av_saveloc = (entity_p) 0;
} else {

View file

@ -101,12 +101,49 @@ STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn)
dlink(lnp, lindir);
}
STATIC void complete_dv_as_rm(line_p lnp, avail_p avp, bool first)
{
/* Complete a / b as a % b = a - b * (a / b). For the first
* occurrence, lnp must stack q, where q = a / b. We prepend a
* DUP to change postfix a b / into a b a b /, then append a
* MLI/MLU and SBI/SBU to make a b a b / * -.
*
* For later occurences, lnp must stack a b q. We append the
* MLI/MLU and SBI/SBU.
*/
line_p dv, dup, ml, sb;
offset size;
bool s;
size = avp->av_size;
s = (avp->av_instr == (byte) op_dvi);
assert(s || avp->av_instr == (byte) op_dvu);
if (first) {
/* Prepend our DUP to avp->av_found, to get before the
* DVI if lnp points to the LOL in DVI STL LOL.
*/
dup = int_line(2 * size);
dup->l_instr = op_dup;
dv = avp->av_found;
dlink(dv->l_prev, dup);
dlink(dup, dv);
}
ml = int_line(size);
sb = int_line(size);
ml->l_instr = (s ? op_mli : op_mlu);
sb->l_instr = (s ? op_sbi : op_sbu);
dlink(sb, lnp->l_next);
dlink(ml, sb);
dlink(lnp, ml);
}
STATIC void replace(occur_p ocp, offset tmp, avail_p avp)
{
/* Replace the lines in the occurrence in ocp by a load of the
* temporary with offset tmp.
*/
register line_p lol, first, last;
register int instr;
assert(avp->av_size == ws || avp->av_size == 2*ws);
@ -119,13 +156,24 @@ STATIC void replace(occur_p ocp, offset tmp, avail_p avp)
if (first->l_prev == (line_p) 0) ocp->oc_belongs->b_start = lol;
dlink(first->l_prev, lol);
if (avp->av_instr == (byte) op_aar) {
/* There may actually be a LAR or a SAR instruction; in that
* case we have to complete the array-instruction.
*/
register int instr = INSTR(last);
if (instr != op_aar) complete_aar(lol, instr, avp->av_othird);
instr = INSTR(last);
switch (avp->av_instr & 0377) {
case op_aar:
/* There may actually be a LAR or a SAR
* instruction; in that case we have to
* complete the array-instruction.
*/
if (instr != op_aar)
complete_aar(lol, instr, avp->av_othird);
break;
case op_dvi:
if (instr == op_rmi)
complete_dv_as_rm(lol, avp, FALSE);
break;
case op_dvu:
if (instr == op_rmu)
complete_dv_as_rm(lol, avp, FALSE);
break;
}
/* Throw away the by now useless lines. */
@ -142,6 +190,7 @@ STATIC void append(avail_p avp, offset tmp)
* within a lar or sar, we must first generate the aar.
*/
register line_p stl, lol;
register int instr;
assert(avp->av_size == ws || avp->av_size == 2*ws);
@ -154,13 +203,26 @@ STATIC void append(avail_p avp, offset tmp)
dlink(stl, lol);
dlink(avp->av_found, stl);
if (avp->av_instr == (byte) op_aar) {
register int instr = INSTR(avp->av_found);
if (instr != op_aar) {
complete_aar(lol, instr, avp->av_othird);
avp->av_found->l_instr = op_aar;
}
instr = INSTR(avp->av_found);
switch (avp->av_instr & 0377) {
case op_aar:
if (instr != op_aar) {
complete_aar(lol, instr, avp->av_othird);
avp->av_found->l_instr = op_aar;
}
break;
case op_dvi:
if (instr == op_rmi) {
complete_dv_as_rm(lol, avp, TRUE);
avp->av_found->l_instr = op_dvi;
}
break;
case op_dvu:
if (instr == op_rmu) {
complete_dv_as_rm(lol, avp, TRUE);
avp->av_found->l_instr = op_dvu;
}
break;
}
}

View file

@ -125,8 +125,8 @@ STATIC struct {
/* nop */ HOPELESS, XXX, XXX, XXX, XXX,
/* rck */ BBLOCK_END, XXX, XXX, XXX, XXX,
/* ret */ BBLOCK_END, XXX, XXX, XXX, XXX,
/* rmi */ BINAIR_OP, ARGW, ARGW, ARGW, ANY,
/* rmu */ BINAIR_OP, ARGW, ARGW, ARGW, ANY,
/* rmi */ REMAINDER, ARGW, ARGW, ARGW, ANY,
/* rmu */ REMAINDER, ARGW, ARGW, ARGW, ANY,
/* rol */ BINAIR_OP, ARGW, WS, ARGW, ANY,
/* ror */ BINAIR_OP, ARGW, WS, ARGW, ANY,
/* rtt */ BBLOCK_END, XXX, XXX, XXX, XXX,
@ -203,6 +203,7 @@ bool stack_group(int instr)
case UNAIR_OP:
case BINAIR_OP:
case TERNAIR_OP:
case REMAINDER:
return TRUE;
default:
return FALSE;

View file

@ -25,6 +25,7 @@ STATIC cset forbidden;
STATIC cset sli_counts;
STATIC short LX_threshold;
STATIC short AR_limit;
STATIC bool RM_to_DV;
STATIC void get_instrs(FILE *f, cset *s_p)
{
@ -97,6 +98,12 @@ void cs_machinit(void *vp)
fscanf(f, "%d", &space);
AR_limit = space;
/* Read whether to convert a remainder RMI/RMU to a division
* DVI/DVU using the formula a % b = a - b * (a / b).
*/
fscanf(f, "%d %d", &time, &space);
RM_to_DV = time_space_ratio >= 50 ? time : space;
/* Read for what counts we must not eliminate an SLI instruction
* when it is part of an array-index computation.
*/
@ -126,6 +133,11 @@ bool may_become_aar(avail_p avp)
return TRUE;
}
bool may_become_dv(void)
{
return RM_to_DV;
}
STATIC bool sli_no_eliminate(line_p lnp)
{
/* Return whether the SLI-instruction in lnp is part of

View file

@ -13,6 +13,11 @@ bool may_become_aar(avail_p avp);
* an AAR LOI/STI.
*/
bool may_become_dv(void); /*
* Return whether an RMI/RMU may become
* a DVI/DVU: a % b = a - (a / b * b).
*/
bool desirable(avail_p avp); /*
* Return whether it is desirable to eliminate
* the recurrences of the expression in avp.

View file

@ -20,6 +20,7 @@
#include "cs_kill.h"
#include "cs_partit.h"
#include "cs_getent.h"
#include "cs_profit.h"
STATIC void push_entity(entity_p enp, line_p lfirst)
{
@ -130,6 +131,37 @@ STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1,
push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst);
}
STATIC void push_remainder(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2)
{
/* Enter the implicit division tkp1 / tkp2,
* then push the remainder tkp1 % tkp2.
*/
struct avail av;
occur_p ocp;
assert(INSTR(lnp) == op_rmi || INSTR(lnp) == op_rmu);
av.av_size = avsize(lnp);
av.av_oleft = tkp1->tk_vn;
av.av_oright = tkp2->tk_vn;
/* Check whether we may convert RMI/RMU to DVI/DVU. */
if (may_become_dv()) {
/* The division is DVI in RMI, or DVU in RMU. */
av.av_instr = (INSTR(lnp) == op_rmi ? op_dvi : op_dvu);
/* In postfix, a b % becomes a b a b / * -. We must
* keep a and b on the stack, so the first instruction
* to eliminate is lnp, not tkp1->l_first.
*/
ocp = newoccur(lnp, lnp, bp);
av_enter(&av, ocp, BINAIR_OP);
}
av.av_instr = INSTR(lnp);
ocp = newoccur(tkp1->tk_lfirst, lnp, bp);
push_avail(av_enter(&av, ocp, REMAINDER), tkp1->tk_lfirst);
}
STATIC void fiddle_stack(line_p lnp)
{
/* The instruction in lnp does something to the valuenumber-stack. */
@ -317,6 +349,11 @@ void vnm(bblock_p bp)
Pop(&tk1, op13size(lnp));
push_ternair_op(bp, lnp, &tk1, &tk2, &tk3);
break;
case REMAINDER:
Pop(&tk2, op22size(lnp));
Pop(&tk1, op12size(lnp));
push_remainder(bp, lnp, &tk1, &tk2);
break;
case KILL_ENTITY:
kill_direct(rep);
break;

View file

@ -78,6 +78,7 @@ cheap operations: -1
-1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -78,6 +78,7 @@ cheap operations: -1
-1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -78,6 +78,7 @@ cheap operations: -1
-1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1
op_cii op_cui op_ciu op_cuu -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1
op_cii op_cui op_ciu op_cuu -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -102,6 +102,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: 1 2 3 -1
1 2 3 -1
forbidden operators: -1 -1

View file

@ -99,6 +99,7 @@ addressing modes: op_adp op_lof op_ldf op_loi op_dch op_lpb -1
cheap operations: -1 -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -102,6 +102,7 @@ cheap operations: op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -92,6 +92,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1
op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -141,6 +141,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: yes yes
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -100,6 +100,7 @@ cheap operations: op_cuu op_ciu op_cui op_cii -1
op_cuu op_ciu op_cui op_cii -1
lexical tresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: -1
-1
forbidden operators: -1 -1

View file

@ -113,6 +113,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif
op_cmi op_cmu op_cmf op_cms op_cmp -1
lexical thresholds: 1 1
indirection limit: 8
convert remainder to division?: no no
do not eliminate sli if index on shiftcounts: 1 2 3 -1
1 2 3 -1
forbidden operators: -1 -1