From b1b737ed6cac47b82f267a2c3e7d4a36d40a02cf Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 5 Mar 2018 13:32:06 -0500 Subject: [PATCH] Optimize procedures that do both a / b and a % b. Enable this in CS for PowerPC; disable it for all other machines. PowerPC has no remainder instruction; the back end uses division to compute remainder. If CS finds both a / b and a % b, then CS now rewrites a % b as a - b * (a / b) and computes a / b only once. This removes an extra division in the PowerPC code, so it saves both time and space. I have not considered whether to enable this optimization for other machines. It might be less useful in machines with a remainder instruction. Also, if a % b occurs before a / b, the EM code gets a DUP. PowerPC ncg handles this DUP well; other back ends might not. --- util/ego/cs/cs.h | 13 +++--- util/ego/cs/cs_avail.c | 5 +- util/ego/cs/cs_elim.c | 90 ++++++++++++++++++++++++++++++------ util/ego/cs/cs_partit.c | 5 +- util/ego/cs/cs_profit.c | 12 +++++ util/ego/cs/cs_profit.h | 5 ++ util/ego/cs/cs_vnm.c | 37 +++++++++++++++ util/ego/descr/em22.descr | 1 + util/ego/descr/em24.descr | 1 + util/ego/descr/em44.descr | 1 + util/ego/descr/i386.descr | 1 + util/ego/descr/i86.descr | 1 + util/ego/descr/m68020.descr | 1 + util/ego/descr/m68k2.descr | 1 + util/ego/descr/m68k4.descr | 1 + util/ego/descr/pdp.descr | 1 + util/ego/descr/powerpc.descr | 1 + util/ego/descr/sparc.descr | 1 + util/ego/descr/vax4.descr | 1 + 19 files changed, 156 insertions(+), 23 deletions(-) diff --git a/util/ego/cs/cs.h b/util/ego/cs/cs.h index c749427a5..7a2ebde7b 100644 --- a/util/ego/cs/cs.h +++ b/util/ego/cs/cs.h @@ -88,12 +88,13 @@ struct occur { #define UNAIR_OP 6 #define BINAIR_OP 7 #define TERNAIR_OP 8 -#define KILL_ENTITY 9 -#define SIDE_EFFECTS 10 -#define FIDDLE_STACK 11 -#define IGNORE 12 -#define HOPELESS 13 -#define BBLOCK_END 14 +#define REMAINDER 9 +#define KILL_ENTITY 10 +#define SIDE_EFFECTS 11 +#define FIDDLE_STACK 12 +#define IGNORE 13 +#define HOPELESS 14 +#define BBLOCK_END 15 struct avail { avail_p av_before; /* Ptr to earlier discovered expressions. */ diff --git a/util/ego/cs/cs_avail.c b/util/ego/cs/cs_avail.c index 025132a2e..b28cc496a 100644 --- a/util/ego/cs/cs_avail.c +++ b/util/ego/cs/cs_avail.c @@ -54,6 +54,7 @@ STATIC bool same_avail(byte kind, avail_p avp1, avail_p avp2) case UNAIR_OP: return avp1->av_operand == avp2->av_operand; case BINAIR_OP: + case REMAINDER: if (commutative(avp1->av_instr & BMASK)) return avp1->av_oleft == avp2->av_oleft && avp1->av_oright == avp2->av_oright @@ -124,6 +125,7 @@ STATIC void copy_avail(int kind, avail_p src, avail_p dst) dst->av_operand = src->av_operand; break; case BINAIR_OP: + case REMAINDER: dst->av_oleft = src->av_oleft; dst->av_oright = src->av_oright; break; @@ -160,7 +162,8 @@ avail_p av_enter(avail_p avp, occur_p ocp, int kind) /* Remember local, if any, that holds result. */ if (avp->av_instr != (byte) INSTR(last)) { /* Only possible when instr is the implicit AAR in - * a LAR or SAR. + * a LAR or SAR, or the implicit DVI in an RMI, or + * DVU in RMU. */ ravp->av_saveloc = (entity_p) 0; } else { diff --git a/util/ego/cs/cs_elim.c b/util/ego/cs/cs_elim.c index 767517bd8..7dce0df09 100644 --- a/util/ego/cs/cs_elim.c +++ b/util/ego/cs/cs_elim.c @@ -101,12 +101,49 @@ STATIC void complete_aar(line_p lnp, int instr, valnum descr_vn) dlink(lnp, lindir); } +STATIC void complete_dv_as_rm(line_p lnp, avail_p avp, bool first) +{ + /* Complete a / b as a % b = a - b * (a / b). For the first + * occurrence, lnp must stack q, where q = a / b. We prepend a + * DUP to change postfix a b / into a b a b /, then append a + * MLI/MLU and SBI/SBU to make a b a b / * -. + * + * For later occurences, lnp must stack a b q. We append the + * MLI/MLU and SBI/SBU. + */ + line_p dv, dup, ml, sb; + offset size; + bool s; + + size = avp->av_size; + s = (avp->av_instr == (byte) op_dvi); + assert(s || avp->av_instr == (byte) op_dvu); + if (first) { + /* Prepend our DUP to avp->av_found, to get before the + * DVI if lnp points to the LOL in DVI STL LOL. + */ + dup = int_line(2 * size); + dup->l_instr = op_dup; + dv = avp->av_found; + dlink(dv->l_prev, dup); + dlink(dup, dv); + } + ml = int_line(size); + sb = int_line(size); + ml->l_instr = (s ? op_mli : op_mlu); + sb->l_instr = (s ? op_sbi : op_sbu); + dlink(sb, lnp->l_next); + dlink(ml, sb); + dlink(lnp, ml); +} + STATIC void replace(occur_p ocp, offset tmp, avail_p avp) { /* Replace the lines in the occurrence in ocp by a load of the * temporary with offset tmp. */ register line_p lol, first, last; + register int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -119,13 +156,24 @@ STATIC void replace(occur_p ocp, offset tmp, avail_p avp) if (first->l_prev == (line_p) 0) ocp->oc_belongs->b_start = lol; dlink(first->l_prev, lol); - if (avp->av_instr == (byte) op_aar) { - /* There may actually be a LAR or a SAR instruction; in that - * case we have to complete the array-instruction. - */ - register int instr = INSTR(last); - - if (instr != op_aar) complete_aar(lol, instr, avp->av_othird); + instr = INSTR(last); + switch (avp->av_instr & 0377) { + case op_aar: + /* There may actually be a LAR or a SAR + * instruction; in that case we have to + * complete the array-instruction. + */ + if (instr != op_aar) + complete_aar(lol, instr, avp->av_othird); + break; + case op_dvi: + if (instr == op_rmi) + complete_dv_as_rm(lol, avp, FALSE); + break; + case op_dvu: + if (instr == op_rmu) + complete_dv_as_rm(lol, avp, FALSE); + break; } /* Throw away the by now useless lines. */ @@ -142,6 +190,7 @@ STATIC void append(avail_p avp, offset tmp) * within a lar or sar, we must first generate the aar. */ register line_p stl, lol; + register int instr; assert(avp->av_size == ws || avp->av_size == 2*ws); @@ -154,13 +203,26 @@ STATIC void append(avail_p avp, offset tmp) dlink(stl, lol); dlink(avp->av_found, stl); - if (avp->av_instr == (byte) op_aar) { - register int instr = INSTR(avp->av_found); - - if (instr != op_aar) { - complete_aar(lol, instr, avp->av_othird); - avp->av_found->l_instr = op_aar; - } + instr = INSTR(avp->av_found); + switch (avp->av_instr & 0377) { + case op_aar: + if (instr != op_aar) { + complete_aar(lol, instr, avp->av_othird); + avp->av_found->l_instr = op_aar; + } + break; + case op_dvi: + if (instr == op_rmi) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvi; + } + break; + case op_dvu: + if (instr == op_rmu) { + complete_dv_as_rm(lol, avp, TRUE); + avp->av_found->l_instr = op_dvu; + } + break; } } diff --git a/util/ego/cs/cs_partit.c b/util/ego/cs/cs_partit.c index e6d3b1fdf..b020ebcfa 100644 --- a/util/ego/cs/cs_partit.c +++ b/util/ego/cs/cs_partit.c @@ -125,8 +125,8 @@ STATIC struct { /* nop */ HOPELESS, XXX, XXX, XXX, XXX, /* rck */ BBLOCK_END, XXX, XXX, XXX, XXX, /* ret */ BBLOCK_END, XXX, XXX, XXX, XXX, -/* rmi */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, -/* rmu */ BINAIR_OP, ARGW, ARGW, ARGW, ANY, +/* rmi */ REMAINDER, ARGW, ARGW, ARGW, ANY, +/* rmu */ REMAINDER, ARGW, ARGW, ARGW, ANY, /* rol */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* ror */ BINAIR_OP, ARGW, WS, ARGW, ANY, /* rtt */ BBLOCK_END, XXX, XXX, XXX, XXX, @@ -203,6 +203,7 @@ bool stack_group(int instr) case UNAIR_OP: case BINAIR_OP: case TERNAIR_OP: + case REMAINDER: return TRUE; default: return FALSE; diff --git a/util/ego/cs/cs_profit.c b/util/ego/cs/cs_profit.c index 8845aaa29..a92028c36 100644 --- a/util/ego/cs/cs_profit.c +++ b/util/ego/cs/cs_profit.c @@ -25,6 +25,7 @@ STATIC cset forbidden; STATIC cset sli_counts; STATIC short LX_threshold; STATIC short AR_limit; +STATIC bool RM_to_DV; STATIC void get_instrs(FILE *f, cset *s_p) { @@ -97,6 +98,12 @@ void cs_machinit(void *vp) fscanf(f, "%d", &space); AR_limit = space; + /* Read whether to convert a remainder RMI/RMU to a division + * DVI/DVU using the formula a % b = a - b * (a / b). + */ + fscanf(f, "%d %d", &time, &space); + RM_to_DV = time_space_ratio >= 50 ? time : space; + /* Read for what counts we must not eliminate an SLI instruction * when it is part of an array-index computation. */ @@ -126,6 +133,11 @@ bool may_become_aar(avail_p avp) return TRUE; } +bool may_become_dv(void) +{ + return RM_to_DV; +} + STATIC bool sli_no_eliminate(line_p lnp) { /* Return whether the SLI-instruction in lnp is part of diff --git a/util/ego/cs/cs_profit.h b/util/ego/cs/cs_profit.h index 43f2bade9..3d1972d24 100644 --- a/util/ego/cs/cs_profit.h +++ b/util/ego/cs/cs_profit.h @@ -13,6 +13,11 @@ bool may_become_aar(avail_p avp); * an AAR LOI/STI. */ +bool may_become_dv(void); /* + * Return whether an RMI/RMU may become + * a DVI/DVU: a % b = a - (a / b * b). + */ + bool desirable(avail_p avp); /* * Return whether it is desirable to eliminate * the recurrences of the expression in avp. diff --git a/util/ego/cs/cs_vnm.c b/util/ego/cs/cs_vnm.c index 67507f805..435dd4658 100644 --- a/util/ego/cs/cs_vnm.c +++ b/util/ego/cs/cs_vnm.c @@ -20,6 +20,7 @@ #include "cs_kill.h" #include "cs_partit.h" #include "cs_getent.h" +#include "cs_profit.h" STATIC void push_entity(entity_p enp, line_p lfirst) { @@ -130,6 +131,37 @@ STATIC void push_ternair_op(bblock_p bp, line_p lnp, token_p tkp1, push_avail(av_enter(&av, ocp, TERNAIR_OP), tkp1->tk_lfirst); } +STATIC void push_remainder(bblock_p bp, line_p lnp, token_p tkp1, token_p tkp2) +{ + /* Enter the implicit division tkp1 / tkp2, + * then push the remainder tkp1 % tkp2. + */ + struct avail av; + occur_p ocp; + + assert(INSTR(lnp) == op_rmi || INSTR(lnp) == op_rmu); + av.av_size = avsize(lnp); + av.av_oleft = tkp1->tk_vn; + av.av_oright = tkp2->tk_vn; + + /* Check whether we may convert RMI/RMU to DVI/DVU. */ + if (may_become_dv()) { + /* The division is DVI in RMI, or DVU in RMU. */ + av.av_instr = (INSTR(lnp) == op_rmi ? op_dvi : op_dvu); + + /* In postfix, a b % becomes a b a b / * -. We must + * keep a and b on the stack, so the first instruction + * to eliminate is lnp, not tkp1->l_first. + */ + ocp = newoccur(lnp, lnp, bp); + av_enter(&av, ocp, BINAIR_OP); + } + + av.av_instr = INSTR(lnp); + ocp = newoccur(tkp1->tk_lfirst, lnp, bp); + push_avail(av_enter(&av, ocp, REMAINDER), tkp1->tk_lfirst); +} + STATIC void fiddle_stack(line_p lnp) { /* The instruction in lnp does something to the valuenumber-stack. */ @@ -317,6 +349,11 @@ void vnm(bblock_p bp) Pop(&tk1, op13size(lnp)); push_ternair_op(bp, lnp, &tk1, &tk2, &tk3); break; + case REMAINDER: + Pop(&tk2, op22size(lnp)); + Pop(&tk1, op12size(lnp)); + push_remainder(bp, lnp, &tk1, &tk2); + break; case KILL_ENTITY: kill_direct(rep); break; diff --git a/util/ego/descr/em22.descr b/util/ego/descr/em22.descr index f995d631c..d9c39226b 100644 --- a/util/ego/descr/em22.descr +++ b/util/ego/descr/em22.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em24.descr b/util/ego/descr/em24.descr index a95751170..cbe0ab5c3 100644 --- a/util/ego/descr/em24.descr +++ b/util/ego/descr/em24.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/em44.descr b/util/ego/descr/em44.descr index 117f26591..b6dbebba3 100644 --- a/util/ego/descr/em44.descr +++ b/util/ego/descr/em44.descr @@ -78,6 +78,7 @@ cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i386.descr b/util/ego/descr/i386.descr index 264151a60..d5a2014bf 100644 --- a/util/ego/descr/i386.descr +++ b/util/ego/descr/i386.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/i86.descr b/util/ego/descr/i86.descr index 8be3ec23e..9b27cf840 100644 --- a/util/ego/descr/i86.descr +++ b/util/ego/descr/i86.descr @@ -93,6 +93,7 @@ cheap operations: op_cii op_cui op_ciu op_cuu -1 op_cii op_cui op_ciu op_cuu -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68020.descr b/util/ego/descr/m68020.descr index 9d2f46b2b..f568e00e2 100644 --- a/util/ego/descr/m68020.descr +++ b/util/ego/descr/m68020.descr @@ -102,6 +102,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k2.descr b/util/ego/descr/m68k2.descr index 58e433db8..6b144cba0 100644 --- a/util/ego/descr/m68k2.descr +++ b/util/ego/descr/m68k2.descr @@ -99,6 +99,7 @@ addressing modes: op_adp op_lof op_ldf op_loi op_dch op_lpb -1 cheap operations: -1 -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/m68k4.descr b/util/ego/descr/m68k4.descr index 8e1da4c5e..6b9d23dfa 100644 --- a/util/ego/descr/m68k4.descr +++ b/util/ego/descr/m68k4.descr @@ -102,6 +102,7 @@ cheap operations: op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/pdp.descr b/util/ego/descr/pdp.descr index e73b3aaf1..ec8f3abca 100644 --- a/util/ego/descr/pdp.descr +++ b/util/ego/descr/pdp.descr @@ -92,6 +92,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr index 5fb9bb628..cf613e96c 100644 --- a/util/ego/descr/powerpc.descr +++ b/util/ego/descr/powerpc.descr @@ -141,6 +141,7 @@ cheap operations: op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 op_cii op_ciu op_cui op_cuu op_cmi op_cmu op_cmp -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: yes yes do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/sparc.descr b/util/ego/descr/sparc.descr index 978c39ba3..79c33decb 100644 --- a/util/ego/descr/sparc.descr +++ b/util/ego/descr/sparc.descr @@ -100,6 +100,7 @@ cheap operations: op_cuu op_ciu op_cui op_cii -1 op_cuu op_ciu op_cui op_cii -1 lexical tresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: -1 -1 forbidden operators: -1 -1 diff --git a/util/ego/descr/vax4.descr b/util/ego/descr/vax4.descr index 5a39ea759..beaf0c427 100644 --- a/util/ego/descr/vax4.descr +++ b/util/ego/descr/vax4.descr @@ -113,6 +113,7 @@ cheap operations: op_cii op_cui op_cfi op_ciu op_cff op_cuu op_cif op_cmi op_cmu op_cmf op_cms op_cmp -1 lexical thresholds: 1 1 indirection limit: 8 +convert remainder to division?: no no do not eliminate sli if index on shiftcounts: 1 2 3 -1 1 2 3 -1 forbidden operators: -1 -1