diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c index e4ab3c078..c63cc20be 100644 --- a/mach/powerpc/ncg/mach.c +++ b/mach/powerpc/ncg/mach.c @@ -4,18 +4,13 @@ * */ -#include +/* + * machine dependent back end routines for the PowerPC + */ + #include -#ifndef NORCSID -static char rcsid[]= "$Id$" ; -#endif - -int framesize; - -/* - * machine dependent back end routines for the Zilog Z80. - */ +static long framesize; con_part(int sz, word w) { @@ -25,17 +20,14 @@ con_part(int sz, word w) part_flush(); if (sz == 1) { w &= 0xFF; - w <<= 8*(3-part_size); + w <<= 8 * (3 - part_size); part_word |= w; } else if (sz == 2) { w &= 0xFFFF; - if (part_size == 0) { - /* Shift 8 for m68k2, 16 otherwise */ - w <<= 4 * TEM_WSIZE; - } + w <<= 8 * (2 - part_size); part_word |= w; } else { - assert(sz == TEM_WSIZE); + assert(sz == 4); part_word = w; } part_size += sz; @@ -56,17 +48,26 @@ con_mult(word sz) #define FL_MSB_AT_LOW_ADDRESS 1 #include +static void +emit_prolog(void) +{ + fprintf(codefile, "mfspr r0, lr\n"); + fprintf(codefile, "addi sp, sp, %ld\n", -framesize - 8); + fprintf(codefile, "stw fp, %ld(sp)\n", framesize); + fprintf(codefile, "stw r0, %ld(sp)\n", framesize + 4); + fprintf(codefile, "addi fp, sp, %ld\n", framesize); +} + void prolog(full nlocals) { - int ss = nlocals + 8; - fprintf(codefile, "addi sp, sp, %d\n", -ss); - fprintf(codefile, "stw fp, %d(sp)\n", nlocals); - fprintf(codefile, "mfspr r0, lr\n" - "stw r0, %d(sp)\n", nlocals+4); - fprintf(codefile, "addi fp, sp, %d\n", nlocals); - framesize = nlocals; + +#ifdef REGVARS + /* f_regsave() will call emit_prolog() */ +#else + emit_prolog(); +#endif } void @@ -102,110 +103,144 @@ char *segname[] = { #ifdef REGVARS -static int savedregsi[32]; -static int numsaved; +static long savedf[32]; +static long savedi[32]; +static int savedtop; + +/* Calculate the register score of a local variable. */ +int +regscore(long offset, int size, int type, int frequency, int totype) +{ + int score; + + switch (type) { + case reg_float: + if (size != 8) { + fprintf(codefile, "! local %ld float size %d reject\n", offset, size); + return -1; + } + break; + default: + if (size != 4) { + fprintf(codefile, "! local %ld int size %d reject\n", offset, size); + return -1; + } + break; + } + + /* Clamp to avoid overflowing 16-bit int score. */ + if (frequency > 8000) + frequency = 8000; + + /* + * Each occurence of a regvar saves about 4 bytes by not + * emitting a load or store instruction. The overhead is + * about 8 bytes to save and restore the register, plus + * 4 bytes if the local is a parameter. + */ + score = 4 * frequency - 8 - ((offset >= 0) ? 4 : 0); + fprintf(codefile, "! local %ld score %d\n", offset, score); + return score; +} /* Initialise regvar system for one function. */ -i_regsave() +i_regsave(void) { int i; - - fprintf(codefile, "! i_regsave()\n"); - for (i=0; i<32; i++) - savedregsi[i] = INT_MAX; - numsaved = 0; + + for (i=0; i<32; i++) { + savedf[i] = LONG_MIN; + savedi[i] = LONG_MIN; + } + + /* Set top of register save area, relative to fp. */ + savedtop = -framesize; } /* Mark a register as being saved. */ -regsave(const char* regname, full offset, int size) +regsave(const char* regname, long offset, int size) { - int regnum = atoi(regname+1); - savedregsi[regnum] = offset; - numsaved++; - - fprintf(codefile, "! %d is saved in %s\n", offset, regname); -#if 0 - fprintf(codefile, "stwu %s, -4(sp)\n", regname); - if (offset >= 0) - fprintf(codefile, "lwz %s, %d(fp)\n", regname, offset); -#endif + int regnum = atoi(regname + 1); + + assert(regnum >= 0 && regnum <= 31); + switch (regname[0]) { + case 'f': + savedf[regnum] = offset; + framesize += 8; + break; + case 'r': + savedi[regnum] = offset; + framesize += 4; + break; + } } -/* Finish saving ragisters. */ - -void saveloadregs(const char* ops, const char* opm) +static void +saveloadregs(const char* ops, const char* opm, const char *opf) { - int offset = -(framesize + numsaved*4); - int reg = 32; - - /* Check for the possibility of a multiple. */ - - do - { - reg--; - } - while ((reg > 0) && (savedregsi[reg] != INT_MAX)); - if (reg < 31) - { - fprintf(codefile, "%s r%d, %d(fp)\n", opm, reg+1, offset); - offset += (31-reg)*4; - } - - /* Saved everything else singly. */ - - while (reg > 0) - { - if (savedregsi[reg] != INT_MAX) - { - fprintf(codefile, "%s r%d, %d(fp)\n", ops, reg, offset); - offset += 4; + long offset = savedtop; + int reg; + + /* Do floating-point registers. */ + for (reg = 31; reg >= 0; reg--) { + if (savedf[reg] != LONG_MIN) { + offset -= 8; + fprintf(codefile, "%s f%d, %ld(fp)\n", + opf, reg, offset); + } + } + + if (savedi[31] != LONG_MIN && savedi[30] != LONG_MIN) { + /* + * Do multiple registers from reg to r31. + * + * Using stmw or lmw reduces code size, but in some + * processors, runs slower than the equivalent pile of + * stw or lwz instructions. + */ + reg = 30; + while (reg > 0 && savedi[reg - 1] != LONG_MIN) + reg--; + offset -= (32 - reg) * 4; + fprintf(codefile, "%s r%d, %ld(fp)\n", opm, reg, offset); + } else + reg = 32; + + /* Do single general-purpose registers. */ + for (reg--; reg >= 0; reg--) { + if (savedi[reg] != LONG_MIN) { + offset -= 4; + fprintf(codefile, "%s r%d, %ld(fp)\n", + ops, reg, offset); } - reg--; } } -f_regsave() +f_regsave(void) { - int i; - fprintf(codefile, "! f_regsave()\n"); - fprintf(codefile, "addi sp, sp, %d\n", -numsaved*4); - - saveloadregs("stw", "stmw"); - - for (i=0; i<32; i++) - if ((savedregsi[i] != INT_MAX) && (savedregsi[i] > 0)) - fprintf(codefile, "lwz r%d, %d(fp)\n", i, savedregsi[i]); + int reg; + + emit_prolog(); + saveloadregs("stw", "stmw", "stfd"); + + for (reg = 31; reg >= 0; reg--) + if (savedf[reg] >= 0) + fprintf(codefile, "lfd f%rd, %ld(fp)\n", + reg, savedf[reg]); + + for (reg = 31; reg >= 0; reg--) + if (savedi[reg] >= 0) + fprintf(codefile, "lwz r%d, %ld(fp)\n", + reg, savedi[reg]); } /* Restore all saved registers. */ -regreturn() +regreturn(void) { - fprintf(codefile, "! regreturn()\n"); - saveloadregs("lwz", "lmw"); -} - -/* Calculate the score of a given register. */ - -int regscore(full offset, int size, int type, int frequency, int totype) -{ - int score; - - fprintf(codefile, "! regscore(%ld, %d, %d, %d, %d)\n", offset, size, type, frequency, totype); - - if (size != 4) - return -1; - - /* Per use: 6 bytes (on average) - * Overhead in prologue: 4 bytes, plus 4 if a parameter - * Overhead in epilogue: 0 bytes - */ - - score = frequency*6 - 4 - ((offset>=0) ? 4 : 0); - fprintf(codefile, "! local at offset %d has regvar score %d\n", offset, score); - return score; + saveloadregs("lwz", "lmw", "lfd"); } #endif diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 76bc5c90a..264767e8b 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -47,14 +47,16 @@ REGISTERS r31, r30, r29, r28, r27, r26, r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, - r13 : GPR, REG regvar. + r13 : GPR, REG regvar(reg_any). r12, r11, r10, r9, r8, r7, r6, r5, r4, r3 : GPR, REG. fp, sp, r0 : GPR. - /* f31 to f14 are reserved for regvar. */ + f31, f30, f29, f28, f27, f26, + f25, f24, f23, f22, f21, f20, + f19, f18, f17, f16, f15, f14 : FPR, FREG regvar(reg_float). f13, f12, f11, f10, f9, f8 f7, f6, f5, f4, f3, f2, f1 : FPR, FREG. @@ -86,6 +88,7 @@ TOKENS LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]". LOCAL = { INT off; } 4 ">>> BUG IN LOCAL". + DLOCAL = { INT off; } 8 ">>> BUG IN DLOCAL". /* Allows us to use regvar() to refer to registers */ @@ -239,27 +242,27 @@ INSTRUCTIONS eqv GPR:wo, GPR:ro, GPR:ro. extsb GPR:wo, GPR:ro. extsh GPR:wo, GPR:ro. - fadd FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fadd FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fadds FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). fctiwz FREG:wo, FREG:ro. - fdiv FREG:wo, FREG:ro, FREG:ro cost(4, 35). + fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35). fdivs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 21). - fmr FPR:wo, FPR:ro cost(4, 5). + fmr FPR+DLOCAL:wo, FPR:ro cost(4, 5). fmr FSREG:wo, FSREG:ro cost(4, 5). - fmul FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fmul FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fmuls FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). - fneg FREG:wo, FREG:ro cost(4, 5). + fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5). fneg FSREG:wo, FSREG:ro cost(4, 5). frsp FSREG:wo, FREG:ro cost(4, 5). - fsub FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lfd FPR:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). + lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). lfdu FPR:wo, IND_RC_D:ro cost(4, 5). - lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). + lfdx FPR+DLOCAL:wo, GPR:ro, GPR:ro cost(4, 5). lfs FSREG:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). @@ -296,7 +299,7 @@ INSTRUCTIONS stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). - stfdu FPR:ro, IND_RC_D:rw cost(4, 4). + stfdu FPR+DLOCAL:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). @@ -318,6 +321,9 @@ MOVES from GPR to GPR gen mr %2, %1 + from FPR to FPR+DLOCAL + gen fmr %2, %1 + /* Constants */ from CONST + CONST_STACK smalls(%val) to GPR @@ -437,10 +443,10 @@ MOVES /* Read double */ - from IND_RC_D+IND_RL_D to FPR + from IND_RC_D+IND_RL_D to FPR+DLOCAL gen lfd %2, %1 - from IND_RR_D to FPR + from IND_RR_D to FPR+DLOCAL gen lfdx %2, %1.reg1, %1.reg2 /* Write double */ @@ -586,9 +592,9 @@ STACKINGRULES move %1, FSCRATCH stfdu FSCRATCH, {IND_RC_D, sp, 0-8} - from FREG to STACK + from FREG+DLOCAL to STACK gen - COMMENT("stack FPR") + COMMENT("stack FREG+DLOCAL") stfdu %1, {IND_RC_D, sp, 0-8} from FSREG to STACK @@ -761,47 +767,57 @@ PATTERNS uses REG={SUM_RIS, fp, his($1)} yields {SUM_RC, %a, los($1)} - pat lol inreg($1)>0 /* Load from local */ + /* Load word from local */ + pat lol inreg($1)==reg_any yields {LOCAL, $1} - - pat lol /* Load from local */ + pat lol leaving lal $1 - loi INT32 + loi 4 - pat ldl /* Load double-word from local */ + /* Load double-word from local */ + pat ldl inreg($1)==reg_float + yields {DLOCAL, $1} + pat ldl leaving lal $1 - loi INT32*2 + loi 8 - pat stl inreg($1)>0 /* Store to local */ + /* Store word to local */ + pat stl inreg($1)==reg_any with ANY_BHW kills regvar($1), LOCAL %off==$1 + gen move %1, {GPRE, regvar($1)} + pat stl + leaving + lal $1 + sti 4 + + /* Store double-word to local */ + pat sdl inreg($1)==reg_float + with exact FREG+IND_ALL_D + gen move %1, {DLOCAL, $1} + with STACK gen - move %1, {GPRE, regvar($1)} - - pat stl /* Store to local */ + lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} + addi sp, sp, {CONST, 8} + pat sdl leaving lal $1 - sti INT32 + sti 8 - pat sdl /* Store double-word to local */ - leaving - lal $1 - sti INT32*2 - - pat lil inreg($1)>0 /* Load from indirected local */ + /* Load indirect from local */ + pat lil inreg($1)==reg_any yields {IND_RC_W, regvar($1), 0} - - pat lil /* Load from indirected local */ + pat lil leaving lol $1 - loi INT32 + loi 4 pat sil /* Save to indirected local */ leaving lol $1 - sti INT32 + sti 4 pat zrl /* Zero local */ leaving @@ -2021,12 +2037,7 @@ PATTERNS yields %1 -/* Floating point support */ - - /* All very cheap and nasty --- this needs to be properly integrated into - * the code generator. ncg doesn't like having separate FPU registers. */ - - /* Single-precision */ +/* Single-precision floating-point */ pat zrf $1==INT32 /* Push zero */ leaving @@ -2168,46 +2179,62 @@ PATTERNS loc 4 cff - /* Double-precision */ + +/* Double-precision floating-point */ pat zrf $1==INT64 /* Push zero */ leaving lde ".fd_00000000" - pat adf $1==INT64 /* Add double */ + pat adf $1==8 /* Add double */ with FREG FREG - uses FREG + uses reusing %1, FREG gen fadd %a, %2, %1 yields %a - - pat sbf $1==INT64 /* Subtract double */ + pat adf sdl $1==8 && inreg($2)==reg_float with FREG FREG - uses FREG + gen fadd {DLOCAL, $2}, %2, %1 + + pat sbf $1==8 /* Subtract double */ + with FREG FREG + uses reusing %1, FREG gen fsub %a, %2, %1 yields %a + pat sbf sdl $1==8 && inreg($2)==reg_float + with FREG FREG + gen fsub {DLOCAL, $2}, %2, %1 - pat mlf $1==INT64 /* Multiply double */ + pat mlf $1==8 /* Multiply double */ with FREG FREG uses reusing %1, FREG gen fmul %a, %2, %1 yields %a + pat mlf sdl $1==8 && inreg($2)==reg_float + with FREG FREG + gen fmul {DLOCAL, $2}, %2, %1 - pat dvf $1==INT64 /* Divide double */ + pat dvf $1==8 /* Divide double */ with FREG FREG uses reusing %1, FREG gen fdiv %a, %2, %1 yields %a + pat dvf sdl $1==8 && inreg($2)==reg_float + with FREG FREG + gen fdiv {DLOCAL, $2}, %2, %1 - pat ngf $1==INT64 /* Negate double */ + pat ngf $1==8 /* Negate double */ with FREG uses reusing %1, FREG gen fneg %a, %1 yields %a + pat ngf sdl $1==8 && inreg($2)==reg_float + with FREG + gen fneg {DLOCAL, $2}, %1 pat cmf $1==INT64 /* Compare double */ with FREG FREG diff --git a/util/ego/descr/powerpc.descr b/util/ego/descr/powerpc.descr index 5138cc44b..e59990ea1 100644 --- a/util/ego/descr/powerpc.descr +++ b/util/ego/descr/powerpc.descr @@ -3,26 +3,32 @@ pointersize: 4 %%RA general registers: 19 address registers: 0 -floating point registers: 0 +floating point registers: 18 use general as pointer: yes register score parameters: local variable: - (2 cases) + (3 cases) pointer,general (1 size) default -> (3,4) general,general (1 size) default -> (3,4) + float,float + (1 size) + default -> (5,4) address of local variable: - (2 cases) + (3 cases) pointer,general (1 size) default -> (0,0) general,general (1 size) default -> (0,0) + float,float + (1 size) + default -> (0,0) constant: (2 sizes) fitbyte -> (-1,-1) @@ -39,21 +45,27 @@ register score parameters: opening cost parameters: local variable: - (2 cases) + (3 cases) pointer (1 size) default -> (3,4) general (1 size) default -> (3,4) + float + (1 size) + default -> (5,4) address of local variable: - (2 cases) + (3 cases) pointer (1 size) default -> (1,4) general (1 size) - general -> (1,4) + default -> (1,4) + float + (1 size) + default -> (1,4) constant: (2 sizes) fitbyte -> (1000,1000) @@ -69,7 +81,7 @@ opening cost parameters: default -> (1000,1000) register save costs: - (21 cases) + (39 cases) 0 -> (0,0) 1 -> (6,8) 2 -> (12,16) @@ -90,6 +102,24 @@ register save costs: 17 -> (102,136) 18 -> (108,144) 19 -> (114,152) + 20 -> (120,160) + 21 -> (126,168) + 22 -> (132,176) + 23 -> (138,184) + 24 -> (144,192) + 25 -> (150,200) + 26 -> (156,208) + 27 -> (162,216) + 28 -> (168,224) + 29 -> (174,232) + 30 -> (180,240) + 31 -> (186,248) + 32 -> (192,256) + 33 -> (198,264) + 34 -> (204,272) + 35 -> (210,280) + 36 -> (216,288) + 37 -> (222,296) 0 -> (0,0) %%UD access costs of global variables: