Teach top to merge or delete "addi sp, sp, X".
This reduces code size, because ncg emits too many "addi sp, sp, X" instructions when unstacking things. Now top lowers "addi sp, sp, X" by lifting other instructions. This sometimes creates chances to merge or delete _addi_ instructions. If no such chance is found, the _addi_ remains uselessly lowered. Edit ncg/table to remove something that top now does. Edit ncg/mach.c to remove some spaces after commas. This removes a whitespace difference between *.s and *.so files, because top removes the space.
This commit is contained in:
parent
720af48d8a
commit
b90c97b00b
|
@ -203,7 +203,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
|
||||||
for (reg = 31; reg >= 0; reg--) {
|
for (reg = 31; reg >= 0; reg--) {
|
||||||
if (savedf[reg] != LONG_MIN) {
|
if (savedf[reg] != LONG_MIN) {
|
||||||
offset -= 8;
|
offset -= 8;
|
||||||
fprintf(codefile, "%s f%d, %ld(fp)\n",
|
fprintf(codefile, "%s f%d,%ld(fp)\n",
|
||||||
opf, reg, offset);
|
opf, reg, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -220,7 +220,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
|
||||||
while (reg > 0 && savedi[reg - 1] != LONG_MIN)
|
while (reg > 0 && savedi[reg - 1] != LONG_MIN)
|
||||||
reg--;
|
reg--;
|
||||||
offset -= (32 - reg) * 4;
|
offset -= (32 - reg) * 4;
|
||||||
fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset);
|
fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset);
|
||||||
} else
|
} else
|
||||||
reg = 32;
|
reg = 32;
|
||||||
|
|
||||||
|
@ -228,7 +228,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf)
|
||||||
for (reg--; reg >= 0; reg--) {
|
for (reg--; reg >= 0; reg--) {
|
||||||
if (savedi[reg] != LONG_MIN) {
|
if (savedi[reg] != LONG_MIN) {
|
||||||
offset -= 4;
|
offset -= 4;
|
||||||
fprintf(codefile, "%s r%d, %ld(fp)\n",
|
fprintf(codefile, "%s r%d,%ld(fp)\n",
|
||||||
ops, reg, offset);
|
ops, reg, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -334,9 +334,9 @@ INSTRUCTIONS
|
||||||
lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
||||||
lhz GPR:wo, SET_RC_H:ro cost(4, 3).
|
lhz GPR:wo, SET_RC_H:ro cost(4, 3).
|
||||||
lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
||||||
|
lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3).
|
||||||
lwzu GPR:wo, IND_RC_W:rw cost(4, 3).
|
lwzu GPR:wo, IND_RC_W:rw cost(4, 3).
|
||||||
lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
||||||
lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3).
|
|
||||||
mfcr GPR:wo cost(4,2).
|
mfcr GPR:wo cost(4,2).
|
||||||
mfspr GPR:wo, SPR:ro cost(4, 3).
|
mfspr GPR:wo, SPR:ro cost(4, 3).
|
||||||
mtspr SPR:wo, GPR:ro cost(4, 2).
|
mtspr SPR:wo, GPR:ro cost(4, 2).
|
||||||
|
@ -361,7 +361,6 @@ INSTRUCTIONS
|
||||||
rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro.
|
rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro.
|
||||||
rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
||||||
slw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
slw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
||||||
subf GPR:wo, GPR:ro, GPR:ro.
|
|
||||||
sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2).
|
sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2).
|
||||||
srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2).
|
srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2).
|
||||||
srw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
srw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
||||||
|
@ -378,6 +377,7 @@ INSTRUCTIONS
|
||||||
stw GPR:ro, SET_RC_W:rw cost(4, 3).
|
stw GPR:ro, SET_RC_W:rw cost(4, 3).
|
||||||
stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
||||||
stwu GPR:ro, IND_RC_W:rw cost(4, 3).
|
stwu GPR:ro, IND_RC_W:rw cost(4, 3).
|
||||||
|
subf GPR:wo, GPR:ro, GPR:ro.
|
||||||
xor GPR:wo, GPR:ro, GPR:ro.
|
xor GPR:wo, GPR:ro, GPR:ro.
|
||||||
xori GPR:wo, GPR:ro, CONST:ro.
|
xori GPR:wo, GPR:ro, CONST:ro.
|
||||||
xoris GPR:wo, GPR:ro, CONST:ro.
|
xoris GPR:wo, GPR:ro, CONST:ro.
|
||||||
|
@ -762,6 +762,10 @@ STACKINGRULES
|
||||||
|
|
||||||
COERCIONS
|
COERCIONS
|
||||||
|
|
||||||
|
/* The unstacking coercions emit many "addi sp, sp, X"
|
||||||
|
* instructions; the target optimizer (top) will merge them.
|
||||||
|
*/
|
||||||
|
|
||||||
from STACK
|
from STACK
|
||||||
uses REG
|
uses REG
|
||||||
gen
|
gen
|
||||||
|
@ -2103,12 +2107,13 @@ PATTERNS
|
||||||
mr fp, r0
|
mr fp, r0
|
||||||
blr.
|
blr.
|
||||||
|
|
||||||
|
/* If "ret" coerces STACK to REG3, then top will delete the
|
||||||
|
* extra "addi sp, sp, 4".
|
||||||
|
*/
|
||||||
|
|
||||||
pat ret $1==4 /* Return from procedure, word */
|
pat ret $1==4 /* Return from procedure, word */
|
||||||
with REG3
|
with REG3
|
||||||
leaving ret 0
|
leaving ret 0
|
||||||
with STACK
|
|
||||||
gen lwz r3, {IND_RC_W, sp, 0}
|
|
||||||
leaving ret 0
|
|
||||||
|
|
||||||
pat ret $1==8 /* Return from proc, double-word */
|
pat ret $1==8 /* Return from proc, double-word */
|
||||||
with REG3 INT_W
|
with REG3 INT_W
|
||||||
|
@ -2117,11 +2122,6 @@ PATTERNS
|
||||||
with REG3 STACK
|
with REG3 STACK
|
||||||
gen lwz r4, {IND_RC_W, sp, 0}
|
gen lwz r4, {IND_RC_W, sp, 0}
|
||||||
leaving ret 0
|
leaving ret 0
|
||||||
with STACK
|
|
||||||
gen
|
|
||||||
lwz r3, {IND_RC_W, sp, 0}
|
|
||||||
lwz r4, {IND_RC_W, sp, 4}
|
|
||||||
leaving ret 0
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These rules for blm/bls are wrong if length is zero.
|
* These rules for blm/bls are wrong if length is zero.
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
|
|
||||||
/* PowerPC table for ACK target optimizer */
|
/* PowerPC table for ACK target optimizer */
|
||||||
|
|
||||||
MAXOP 3;
|
MAXOP 5;
|
||||||
LABEL_STARTER '.';
|
LABEL_STARTER '.';
|
||||||
|
|
||||||
%%;
|
%%;
|
||||||
|
|
||||||
|
L1, L2, L3, L4, L5 { not_using_sp(VAL) };
|
||||||
RNZ { strcmp(VAL, "r0") }; /* not r0 */
|
RNZ { strcmp(VAL, "r0") }; /* not r0 */
|
||||||
X, Y, Z { TRUE };
|
X, Y, Z { TRUE };
|
||||||
|
|
||||||
|
@ -16,6 +17,47 @@ X, Y, Z { TRUE };
|
||||||
addi RNZ, RNZ, 0 -> ;
|
addi RNZ, RNZ, 0 -> ;
|
||||||
addis RNZ, RNZ, 0 -> ;
|
addis RNZ, RNZ, 0 -> ;
|
||||||
|
|
||||||
|
addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) }
|
||||||
|
-> addi RNZ, RNZ, Z ;
|
||||||
|
|
||||||
|
/* Lower "addi sp, sp, X" by lifting other instructions, looking for
|
||||||
|
* chances to merge or delete _addi_ instructions, and assuming that
|
||||||
|
* the code generator uses "sp" not "r1".
|
||||||
|
*/
|
||||||
|
addi sp, sp, X : ANY L1 { lift(ANY) }
|
||||||
|
-> ANY L1 : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : ANY L1, L2 { lift(ANY) }
|
||||||
|
-> ANY L1, L2 : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : ANY L1, L2, L3 { lift(ANY) }
|
||||||
|
-> ANY L1, L2, L3 : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : ANY L1, L2, L3, L4 { lift(ANY) }
|
||||||
|
-> ANY L1, L2, L3, L4 : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : ANY L1, L2, L3, L4, L5 { lift(ANY) }
|
||||||
|
-> ANY L1, L2, L3, L4, L5 : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 }
|
||||||
|
-> lmw Y, L1 : addi sp, sp, X ;
|
||||||
|
|
||||||
|
/* Merge _addi_ when popping from the stack. */
|
||||||
|
addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> lwz L1, Z(sp) : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> lfs L1, Z(sp) : addi sp, sp, X ;
|
||||||
|
addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> lfd L1, Z(sp) : addi sp, sp, X ;
|
||||||
|
|
||||||
|
/* Lower or delete _addi_ when pushing to the stack. */
|
||||||
|
addi sp, sp, X : stwu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> stw L1, Z(sp) : addi sp, sp, Z ;
|
||||||
|
addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> stfs L1, Z(sp) : addi sp, sp, Z ;
|
||||||
|
addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' }
|
||||||
|
-> stfd L1, Z(sp) : addi sp, sp, Z ;
|
||||||
|
addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ;
|
||||||
|
|
||||||
|
/* Delete _addi_ when setting the stack pointer. */
|
||||||
|
addi sp, sp, X : addi sp, L1, Y -> addi sp, L1, Y ;
|
||||||
|
addi sp, sp, X : lwz sp, L1 -> lwz sp, L1 ;
|
||||||
|
|
||||||
or X, Y, Y -> mr X, Y ;
|
or X, Y, Y -> mr X, Y ;
|
||||||
or. X, Y, Y -> mr. X, Y ;
|
or. X, Y, Y -> mr. X, Y ;
|
||||||
|
|
||||||
|
@ -50,3 +92,89 @@ b X : labdef X -> labdef X ;
|
||||||
/* LT=0, GT=1, EQ=2, OV=3 */
|
/* LT=0, GT=1, EQ=2, OV=3 */
|
||||||
|
|
||||||
%%;
|
%%;
|
||||||
|
|
||||||
|
/* Is it a word character? 0-9A-Za-z_ */
|
||||||
|
static int isword(char c) {
|
||||||
|
return
|
||||||
|
(c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') ||
|
||||||
|
(c >= 'a' && c <= 'z') || (c == '_');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Does operand _s_ not use the stack pointer? */
|
||||||
|
int not_using_sp(const char *s) {
|
||||||
|
int boundary;
|
||||||
|
|
||||||
|
boundary = 1;
|
||||||
|
while (*s) {
|
||||||
|
if (boundary &&
|
||||||
|
((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) &&
|
||||||
|
!isword(s[2]))
|
||||||
|
return 0;
|
||||||
|
boundary = !isword(*s);
|
||||||
|
s++;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Instructions to lift(), sorted in strcmp() order. These are from
|
||||||
|
* ../ncg/table, minus branch instructions.
|
||||||
|
*/
|
||||||
|
const char *liftables[] = {
|
||||||
|
"add", "add.", "addi",
|
||||||
|
"and", "andc", "andi.", "andis.",
|
||||||
|
"cmp", "cmpi", "cmpl", "cmpli",
|
||||||
|
"cmplw", "cmplwi", "cmpw", "cmpwi",
|
||||||
|
"divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh",
|
||||||
|
"fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs",
|
||||||
|
"fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs",
|
||||||
|
"lbz", "lbzx",
|
||||||
|
"lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx",
|
||||||
|
"lha", "lhax", "lhz", "lhzx",
|
||||||
|
"li", "lis", "lwz", "lwzu", "lwzx",
|
||||||
|
"mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw",
|
||||||
|
"nand", "neg", "nor", "or", "or.", "ori", "oris",
|
||||||
|
"rlwinm", "rlwnm", "rotlwi", "rotrwi",
|
||||||
|
"slw", "slwi", "sraw", "srawi", "srw", "srwi",
|
||||||
|
"stb", "stbx",
|
||||||
|
"stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx",
|
||||||
|
"sth", "sthx", "stw", "stwx", "stwu",
|
||||||
|
"subf", "xor", "xori", "xoris",
|
||||||
|
};
|
||||||
|
|
||||||
|
static int liftcmp(const void *a, const void *b) {
|
||||||
|
return strcmp(*(const char **)a, *(const char **)b);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* May we lift instruction _s_ above "addi SP, SP, X"? */
|
||||||
|
int lift(const char *s) {
|
||||||
|
return bsearch(&s, liftables,
|
||||||
|
sizeof(liftables) / sizeof(liftables[0]),
|
||||||
|
sizeof(liftables[0]), liftcmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Does it fit a signed 16-bit integer? */
|
||||||
|
static int fits16(long l) {
|
||||||
|
return l >= -32768 && l <= 32767;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tries sum = a + b with signed 16-bit integers. */
|
||||||
|
int plus(const char *a, const char *b, const char *sum)
|
||||||
|
{
|
||||||
|
long la, lb, lsum;
|
||||||
|
char *end;
|
||||||
|
|
||||||
|
la = strtol(a, &end, 10);
|
||||||
|
if (*a == '\0' || *end != '\0' || !fits16(la))
|
||||||
|
return 0;
|
||||||
|
lb = strtol(b, &end, 10);
|
||||||
|
if (*b == '\0' || *end != '\0' || !fits16(lb))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
lsum = la + lb;
|
||||||
|
if (!fits16(lsum))
|
||||||
|
return 0;
|
||||||
|
snprintf(sum, 7, "%ld", lsum);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue