From b90c97b00bf4bef2bd51403e9bf2b4795247fd9a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 5 Jan 2018 17:55:50 -0500 Subject: [PATCH] Teach top to merge or delete "addi sp, sp, X". This reduces code size, because ncg emits too many "addi sp, sp, X" instructions when unstacking things. Now top lowers "addi sp, sp, X" by lifting other instructions. This sometimes creates chances to merge or delete _addi_ instructions. If no such chance is found, the _addi_ remains uselessly lowered. Edit ncg/table to remove something that top now does. Edit ncg/mach.c to remove some spaces after commas. This removes a whitespace difference between *.s and *.so files, because top removes the space. --- mach/powerpc/ncg/mach.c | 6 +- mach/powerpc/ncg/table | 20 +++---- mach/powerpc/top/table | 130 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 14 deletions(-) diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index a31879de9..06e39709f 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -203,7 +203,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg = 31; reg >= 0; reg--) { if (savedf[reg] != LONG_MIN) { offset -= 8; - fprintf(codefile, "%s f%d, %ld(fp)\n", + fprintf(codefile, "%s f%d,%ld(fp)\n", opf, reg, offset); } } @@ -220,7 +220,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) while (reg > 0 && savedi[reg - 1] != LONG_MIN) reg--; offset -= (32 - reg) * 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset); + fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset); } else reg = 32; @@ -228,7 +228,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg--; reg >= 0; reg--) { if (savedi[reg] != LONG_MIN) { offset -= 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", + fprintf(codefile, "%s r%d,%ld(fp)\n", ops, reg, offset); } } diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 367942408..df06a5d49 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -334,9 +334,9 @@ INSTRUCTIONS lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). lhz GPR:wo, SET_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). @@ -361,7 +361,6 @@ INSTRUCTIONS rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. slw GPR+LOCAL:wo, GPR:ro, GPR:ro. - subf GPR:wo, GPR:ro, GPR:ro. sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. @@ -378,6 +377,7 @@ INSTRUCTIONS stw GPR:ro, SET_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). + subf GPR:wo, GPR:ro, GPR:ro. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -762,6 +762,10 @@ STACKINGRULES COERCIONS + /* The unstacking coercions emit many "addi sp, sp, X" + * instructions; the target optimizer (top) will merge them. + */ + from STACK uses REG gen @@ -2103,12 +2107,13 @@ PATTERNS mr fp, r0 blr. + /* If "ret" coerces STACK to REG3, then top will delete the + * extra "addi sp, sp, 4". + */ + pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 - with STACK - gen lwz r3, {IND_RC_W, sp, 0} - leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ with REG3 INT_W @@ -2117,11 +2122,6 @@ PATTERNS with REG3 STACK gen lwz r4, {IND_RC_W, sp, 0} leaving ret 0 - with STACK - gen - lwz r3, {IND_RC_W, sp, 0} - lwz r4, {IND_RC_W, sp, 4} - leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index b3f5b3a31..cbc16c277 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -1,11 +1,12 @@ /* PowerPC table for ACK target optimizer */ -MAXOP 3; +MAXOP 5; LABEL_STARTER '.'; %%; +L1, L2, L3, L4, L5 { not_using_sp(VAL) }; RNZ { strcmp(VAL, "r0") }; /* not r0 */ X, Y, Z { TRUE }; @@ -16,6 +17,47 @@ X, Y, Z { TRUE }; addi RNZ, RNZ, 0 -> ; addis RNZ, RNZ, 0 -> ; +addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) } + -> addi RNZ, RNZ, Z ; + +/* Lower "addi sp, sp, X" by lifting other instructions, looking for + * chances to merge or delete _addi_ instructions, and assuming that + * the code generator uses "sp" not "r1". + */ +addi sp, sp, X : ANY L1 { lift(ANY) } + -> ANY L1 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2 { lift(ANY) } + -> ANY L1, L2 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3 { lift(ANY) } + -> ANY L1, L2, L3 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4 { lift(ANY) } + -> ANY L1, L2, L3, L4 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4, L5 { lift(ANY) } + -> ANY L1, L2, L3, L4, L5 : addi sp, sp, X ; +addi sp, sp, X : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } + -> lmw Y, L1 : addi sp, sp, X ; + +/* Merge _addi_ when popping from the stack. */ +addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lwz L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfs L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfd L1, Z(sp) : addi sp, sp, X ; + +/* Lower or delete _addi_ when pushing to the stack. */ +addi sp, sp, X : stwu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stw L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfs L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfd L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ; + +/* Delete _addi_ when setting the stack pointer. */ +addi sp, sp, X : addi sp, L1, Y -> addi sp, L1, Y ; +addi sp, sp, X : lwz sp, L1 -> lwz sp, L1 ; + or X, Y, Y -> mr X, Y ; or. X, Y, Y -> mr. X, Y ; @@ -50,3 +92,89 @@ b X : labdef X -> labdef X ; /* LT=0, GT=1, EQ=2, OV=3 */ %%; + +/* Is it a word character? 0-9A-Za-z_ */ +static int isword(char c) { + return + (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || (c == '_'); +} + +/* Does operand _s_ not use the stack pointer? */ +int not_using_sp(const char *s) { + int boundary; + + boundary = 1; + while (*s) { + if (boundary && + ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) && + !isword(s[2])) + return 0; + boundary = !isword(*s); + s++; + } + return 1; +} + + +/* Instructions to lift(), sorted in strcmp() order. These are from + * ../ncg/table, minus branch instructions. + */ +const char *liftables[] = { + "add", "add.", "addi", + "and", "andc", "andi.", "andis.", + "cmp", "cmpi", "cmpl", "cmpli", + "cmplw", "cmplwi", "cmpw", "cmpwi", + "divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh", + "fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs", + "fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs", + "lbz", "lbzx", + "lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx", + "lha", "lhax", "lhz", "lhzx", + "li", "lis", "lwz", "lwzu", "lwzx", + "mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw", + "nand", "neg", "nor", "or", "or.", "ori", "oris", + "rlwinm", "rlwnm", "rotlwi", "rotrwi", + "slw", "slwi", "sraw", "srawi", "srw", "srwi", + "stb", "stbx", + "stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx", + "sth", "sthx", "stw", "stwx", "stwu", + "subf", "xor", "xori", "xoris", +}; + +static int liftcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); +} + +/* May we lift instruction _s_ above "addi SP, SP, X"? */ +int lift(const char *s) { + return bsearch(&s, liftables, + sizeof(liftables) / sizeof(liftables[0]), + sizeof(liftables[0]), liftcmp); +} + + +/* Does it fit a signed 16-bit integer? */ +static int fits16(long l) { + return l >= -32768 && l <= 32767; +} + +/* Tries sum = a + b with signed 16-bit integers. */ +int plus(const char *a, const char *b, const char *sum) +{ + long la, lb, lsum; + char *end; + + la = strtol(a, &end, 10); + if (*a == '\0' || *end != '\0' || !fits16(la)) + return 0; + lb = strtol(b, &end, 10); + if (*b == '\0' || *end != '\0' || !fits16(lb)) + return 0; + + lsum = la + lb; + if (!fits16(lsum)) + return 0; + snprintf(sum, 7, "%ld", lsum); + return 1; +}