diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index a31879de9..06e39709f 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -203,7 +203,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg = 31; reg >= 0; reg--) { if (savedf[reg] != LONG_MIN) { offset -= 8; - fprintf(codefile, "%s f%d, %ld(fp)\n", + fprintf(codefile, "%s f%d,%ld(fp)\n", opf, reg, offset); } } @@ -220,7 +220,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) while (reg > 0 && savedi[reg - 1] != LONG_MIN) reg--; offset -= (32 - reg) * 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset); + fprintf(codefile, "%s r%d,%ld(fp)\n", opm, reg, offset); } else reg = 32; @@ -228,7 +228,7 @@ saveloadregs(const char* ops, const char* opm, const char *opf) for (reg--; reg >= 0; reg--) { if (savedi[reg] != LONG_MIN) { offset -= 4; - fprintf(codefile, "%s r%d, %ld(fp)\n", + fprintf(codefile, "%s r%d,%ld(fp)\n", ops, reg, offset); } } diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 367942408..df06a5d49 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -334,9 +334,9 @@ INSTRUCTIONS lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). lhz GPR:wo, SET_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3). mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). @@ -361,7 +361,6 @@ INSTRUCTIONS rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. slw GPR+LOCAL:wo, GPR:ro, GPR:ro. - subf GPR:wo, GPR:ro, GPR:ro. sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. @@ -378,6 +377,7 @@ INSTRUCTIONS stw GPR:ro, SET_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). + subf GPR:wo, GPR:ro, GPR:ro. xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -762,6 +762,10 @@ STACKINGRULES COERCIONS + /* The unstacking coercions emit many "addi sp, sp, X" + * instructions; the target optimizer (top) will merge them. + */ + from STACK uses REG gen @@ -2103,12 +2107,13 @@ PATTERNS mr fp, r0 blr. + /* If "ret" coerces STACK to REG3, then top will delete the + * extra "addi sp, sp, 4". + */ + pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 - with STACK - gen lwz r3, {IND_RC_W, sp, 0} - leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ with REG3 INT_W @@ -2117,11 +2122,6 @@ PATTERNS with REG3 STACK gen lwz r4, {IND_RC_W, sp, 0} leaving ret 0 - with STACK - gen - lwz r3, {IND_RC_W, sp, 0} - lwz r4, {IND_RC_W, sp, 4} - leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. diff --git a/mach/powerpc/top/table b/mach/powerpc/top/table index b3f5b3a31..cbc16c277 100644 --- a/mach/powerpc/top/table +++ b/mach/powerpc/top/table @@ -1,11 +1,12 @@ /* PowerPC table for ACK target optimizer */ -MAXOP 3; +MAXOP 5; LABEL_STARTER '.'; %%; +L1, L2, L3, L4, L5 { not_using_sp(VAL) }; RNZ { strcmp(VAL, "r0") }; /* not r0 */ X, Y, Z { TRUE }; @@ -16,6 +17,47 @@ X, Y, Z { TRUE }; addi RNZ, RNZ, 0 -> ; addis RNZ, RNZ, 0 -> ; +addi RNZ, RNZ, X : addi RNZ, RNZ, Y { plus(X, Y, Z) } + -> addi RNZ, RNZ, Z ; + +/* Lower "addi sp, sp, X" by lifting other instructions, looking for + * chances to merge or delete _addi_ instructions, and assuming that + * the code generator uses "sp" not "r1". + */ +addi sp, sp, X : ANY L1 { lift(ANY) } + -> ANY L1 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2 { lift(ANY) } + -> ANY L1, L2 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3 { lift(ANY) } + -> ANY L1, L2, L3 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4 { lift(ANY) } + -> ANY L1, L2, L3, L4 : addi sp, sp, X ; +addi sp, sp, X : ANY L1, L2, L3, L4, L5 { lift(ANY) } + -> ANY L1, L2, L3, L4, L5 : addi sp, sp, X ; +addi sp, sp, X : lmw Y, L1 { Y[0]=='r' && atoi(Y+1)>1 } + -> lmw Y, L1 : addi sp, sp, X ; + +/* Merge _addi_ when popping from the stack. */ +addi sp, sp, X : lwz L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lwz L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfs L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfs L1, Z(sp) : addi sp, sp, X ; +addi sp, sp, X : lfd L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> lfd L1, Z(sp) : addi sp, sp, X ; + +/* Lower or delete _addi_ when pushing to the stack. */ +addi sp, sp, X : stwu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stw L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfsu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfs L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, X : stfdu L1, Y(sp) { plus(X, Y, Z) && Z[0]!='-' } + -> stfd L1, Z(sp) : addi sp, sp, Z ; +addi sp, sp, 4 : stfdu L1, -8(sp) -> stfdu L1, -4(sp) ; + +/* Delete _addi_ when setting the stack pointer. */ +addi sp, sp, X : addi sp, L1, Y -> addi sp, L1, Y ; +addi sp, sp, X : lwz sp, L1 -> lwz sp, L1 ; + or X, Y, Y -> mr X, Y ; or. X, Y, Y -> mr. X, Y ; @@ -50,3 +92,89 @@ b X : labdef X -> labdef X ; /* LT=0, GT=1, EQ=2, OV=3 */ %%; + +/* Is it a word character? 0-9A-Za-z_ */ +static int isword(char c) { + return + (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || (c == '_'); +} + +/* Does operand _s_ not use the stack pointer? */ +int not_using_sp(const char *s) { + int boundary; + + boundary = 1; + while (*s) { + if (boundary && + ((s[0]=='s' && s[1]=='p') || (s[0]=='r' && s[1]=='1')) && + !isword(s[2])) + return 0; + boundary = !isword(*s); + s++; + } + return 1; +} + + +/* Instructions to lift(), sorted in strcmp() order. These are from + * ../ncg/table, minus branch instructions. + */ +const char *liftables[] = { + "add", "add.", "addi", + "and", "andc", "andi.", "andis.", + "cmp", "cmpi", "cmpl", "cmpli", + "cmplw", "cmplwi", "cmpw", "cmpwi", + "divw", "divwu", "eqv", "extlwi", "extrwi", "extsb", "extsh", + "fadd", "fadds", "fcmpo", "fctiwz", "fdiv", "fdivs", + "fmr", "fmul", "fmuls", "fneg", "frsp", "fsub", "fsubs", + "lbz", "lbzx", + "lfd", "lfdu", "lfdx", "lfs", "lfsu", "lfsx", + "lha", "lhax", "lhz", "lhzx", + "li", "lis", "lwz", "lwzu", "lwzx", + "mfcr", "mfspr", "mr", "mr.", "mtspr", "mullw", + "nand", "neg", "nor", "or", "or.", "ori", "oris", + "rlwinm", "rlwnm", "rotlwi", "rotrwi", + "slw", "slwi", "sraw", "srawi", "srw", "srwi", + "stb", "stbx", + "stfd", "stfdu", "stfdx", "stfs", "stfsu", "stfsx", + "sth", "sthx", "stw", "stwx", "stwu", + "subf", "xor", "xori", "xoris", +}; + +static int liftcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); +} + +/* May we lift instruction _s_ above "addi SP, SP, X"? */ +int lift(const char *s) { + return bsearch(&s, liftables, + sizeof(liftables) / sizeof(liftables[0]), + sizeof(liftables[0]), liftcmp); +} + + +/* Does it fit a signed 16-bit integer? */ +static int fits16(long l) { + return l >= -32768 && l <= 32767; +} + +/* Tries sum = a + b with signed 16-bit integers. */ +int plus(const char *a, const char *b, const char *sum) +{ + long la, lb, lsum; + char *end; + + la = strtol(a, &end, 10); + if (*a == '\0' || *end != '\0' || !fits16(la)) + return 0; + lb = strtol(b, &end, 10); + if (*b == '\0' || *end != '\0' || !fits16(lb)) + return 0; + + lsum = la + lb; + if (!fits16(lsum)) + return 0; + snprintf(sum, 7, "%ld", lsum); + return 1; +}