Speed up register allocation by removing some register classes.

The table for PowerPC had placed each GPR and FPR into an individual
register class (like GPR3, GPR4, FPR1, FPR2), and had used these
classes to coerce stack values into specific registers.  But ncg does
not like having many register classes.

In http://tack.sourceforge.net/olddocs/ncg.pdf
Hans van Staveren wrote:

> Every extra property means the register set is more unorthogonal and
> *cg* execution time is influenced by that, because it has to take
> into account a larger set of registers that are not equivalent.  So
> try to keep the number of different register classes to a minimum.

Recent changes to the PowerPC table have removed many coercions to
specific registers.  Many functions in libem switched from taking
values in registers to taking them from the stack (see dc05cb2).

I now remove all 64 individual register classes of GPR and FPR.  In
the few cases where I need a stack value in a specific register, I now
do a move (as the arm and m68020 tables do).

This commit speeds the compilation of some files.  For my test file
fconv.c, the compilation time goes from over 20 seconds to under 1
second.  My fconv.c has 4 conversions from floats to integers, and the
table has my experimental rules that do the conversions by allocating
4 or 5 registers.
This commit is contained in:
George Koehler 2017-02-13 17:44:46 -05:00
parent dc05cb2dc8
commit c5bb3be495

View file

@ -39,118 +39,47 @@ PROPERTIES
SPR /* any SPR */ SPR /* any SPR */
CR /* any CR */ CR /* any CR */
GPR0 GPRSP GPRFP GPR3 GPR4 GPR5 GPR6 GPR7
GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15
GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23
GPR24 GPR25 GPR26 GPR27 GPR28 GPR29 GPR30 GPR31
FPR0(8) FPR1(8) FPR2(8) FPR3(8) FPR4(8) FPR5(8) FPR6(8) FPR7(8)
FPR8(8) FPR9(8) FPR10(8) FPR11(8) FPR12(8) FPR13(8) FPR14(8) FPR15(8)
FPR16(8) FPR17(8) FPR18(8) FPR19(8) FPR20(8) FPR21(8) FPR22(8) FPR23(8)
FPR24(8) FPR25(8) FPR26(8) FPR27(8) FPR28(8) FPR29(8) FPR30(8) FPR31(8)
REGISTERS REGISTERS
/* Reverse order to encourage ncg to allocate them from r31 down */ /* Reverse order to encourage ncg to allocate them from r31 down */
R31("r31") : GPR, REG, GPR31 regvar. r31, r30, r29, r28, r27, r26,
R30("r30") : GPR, REG, GPR30 regvar. r25, r24, r23, r22, r21, r20,
R29("r29") : GPR, REG, GPR29 regvar. r19, r18, r17, r16, r15, r14,
R28("r28") : GPR, REG, GPR28 regvar. r13 : GPR, REG regvar.
R27("r27") : GPR, REG, GPR27 regvar.
R26("r26") : GPR, REG, GPR26 regvar. r12, r11, r10, r9, r8, r7,
R25("r25") : GPR, REG, GPR25 regvar. r6, r5, r4, r3 : GPR, REG.
R24("r24") : GPR, REG, GPR24 regvar.
R23("r23") : GPR, REG, GPR23 regvar. fp, sp, r0 : GPR.
R22("r22") : GPR, REG, GPR22 regvar.
R21("r21") : GPR, REG, GPR21 regvar.
R20("r20") : GPR, REG, GPR20 regvar.
R19("r19") : GPR, REG, GPR19 regvar.
R18("r18") : GPR, REG, GPR18 regvar.
R17("r17") : GPR, REG, GPR17 regvar.
R16("r16") : GPR, REG, GPR16 regvar.
R15("r15") : GPR, REG, GPR15 regvar.
R14("r14") : GPR, REG, GPR14 regvar.
R13("r13") : GPR, REG, GPR13 regvar.
R12("r12") : GPR, REG, GPR12.
R11("r11") : GPR, REG, GPR11.
R10("r10") : GPR, REG, GPR10.
R9("r9") : GPR, REG, GPR9.
R8("r8") : GPR, REG, GPR8.
R7("r7") : GPR, REG, GPR7.
R6("r6") : GPR, REG, GPR6.
R5("r5") : GPR, REG, GPR5.
R4("r4") : GPR, REG, GPR4.
R3("r3") : GPR, REG, GPR3.
FP("fp") : GPR, GPRFP.
SP("sp") : GPR, GPRSP.
R0("r0") : GPR, GPR0.
/* speed hack for sti 8 */ /* speed hack for sti 8 */
PAIR_R9_R10=R9+R10 : REG_PAIR. PAIR_R9_R10=r9+r10 : REG_PAIR.
PAIR_R7_R8=R7+R8 : REG_PAIR. PAIR_R7_R8=r7+r8 : REG_PAIR.
PAIR_R5_R6=R5+R6 : REG_PAIR. PAIR_R5_R6=r5+r6 : REG_PAIR.
PAIR_R3_R4=R3+R4 : REG_PAIR. PAIR_R3_R4=r3+r4 : REG_PAIR.
/* /* f31 to f14 are reserved for regvar. */
* F14 to F31 are reserved for regvar, if we ever implement
* it. Don't add them to FREG; the register allocator would
* be too slow.
*/
F31("f31") : FPR, FPR31.
F30("f30") : FPR, FPR30.
F29("f29") : FPR, FPR29.
F28("f28") : FPR, FPR28.
F27("f27") : FPR, FPR27.
F26("f26") : FPR, FPR26.
F25("f25") : FPR, FPR25.
F24("f24") : FPR, FPR24.
F23("f23") : FPR, FPR23.
F22("f22") : FPR, FPR22.
F21("f21") : FPR, FPR21.
F20("f20") : FPR, FPR20.
F19("f19") : FPR, FPR19.
F18("f18") : FPR, FPR18.
F17("f17") : FPR, FPR17.
F16("f16") : FPR, FPR16.
F15("f15") : FPR, FPR15.
F14("f14") : FPR, FPR14.
F13("f13") : FPR, FREG, FPR13.
F12("f12") : FPR, FREG, FPR12.
F11("f11") : FPR, FREG, FPR11.
F10("f10") : FPR, FREG, FPR10.
F9("f9") : FPR, FREG, FPR9.
F8("f8") : FPR, FREG, FPR8.
F7("f7") : FPR, FREG, FPR7.
F6("f6") : FPR, FREG, FPR6.
F5("f5") : FPR, FREG, FPR5.
F4("f4") : FPR, FREG, FPR4.
F3("f3") : FPR, FREG, FPR3.
F2("f2") : FPR, FREG, FPR2.
F1("f1") : FPR, FREG, FPR1.
F0("f0") : FPR, FPR0.
FS13("f13")=F13 : FSREG. f13, f12, f11, f10, f9, f8
FS12("f12")=F12 : FSREG. f7, f6, f5, f4, f3, f2, f1 : FPR, FREG.
FS11("f11")=F11 : FSREG.
FS10("f10")=F10 : FSREG.
FS9("f9")=F9 : FSREG.
FS8("f8")=F8 : FSREG.
FS7("f7")=F7 : FSREG.
FS6("f6")=F6 : FSREG.
FS5("f5")=F5 : FSREG.
FS4("f4")=F4 : FSREG.
FS3("f3")=F3 : FSREG.
FS2("f2")=F2 : FSREG.
FS1("f1")=F1 : FSREG.
LR("lr") : SPR. f0 : FPR.
CTR("ctr") : SPR.
CR0("cr0") : CR.
#define RSCRATCH R0 fs13("f13")=f13, fs12("f12")=f12,
#define FSCRATCH F0 fs11("f11")=f11, fs10("f10")=f10,
fs9("f9")=f9, fs8("f8")=f8,
fs7("f7")=f7, fs6("f6")=f6,
fs5("f5")=f5, fs4("f4")=f4,
fs3("f3")=f3, fs2("f2")=f2,
fs1("f1")=f1 : FSREG.
lr, ctr : SPR.
cr0 : CR.
#define RSCRATCH r0
#define FSCRATCH f0
TOKENS TOKENS
@ -580,12 +509,12 @@ MOVES
from COND_FS to GPR from COND_FS to GPR
gen gen
fcmpo CR0, %1.reg1, %1.reg2 fcmpo cr0, %1.reg1, %1.reg2
mfcr %2 mfcr %2
from COND_FD to GPR from COND_FD to GPR
gen gen
fcmpo CR0, %1.reg1, %1.reg2 fcmpo cr0, %1.reg1, %1.reg2
mfcr %2 mfcr %2
/* Given a copy of cr0 in %1.reg, extract a condition bit /* Given a copy of cr0 in %1.reg, extract a condition bit
@ -644,40 +573,40 @@ STACKINGRULES
from LOCAL to STACK from LOCAL to STACK
gen gen
COMMENT("stack LOCAL") COMMENT("stack LOCAL")
stwu %1, {IND_RC_W, SP, 0-4} stwu %1, {IND_RC_W, sp, 0-4}
from REG to STACK from REG to STACK
gen gen
COMMENT("stack REG") COMMENT("stack REG")
stwu %1, {IND_RC_W, SP, 0-4} stwu %1, {IND_RC_W, sp, 0-4}
from REG_PAIR to STACK from REG_PAIR to STACK
gen gen
COMMENT("stack REG_PAIR") COMMENT("stack REG_PAIR")
stwu %1.2, {IND_RC_W, SP, 0-4} stwu %1.2, {IND_RC_W, sp, 0-4}
stwu %1.1, {IND_RC_W, SP, 0-4} stwu %1.1, {IND_RC_W, sp, 0-4}
from ANY_BHW-REG to STACK from ANY_BHW-REG to STACK
gen gen
COMMENT("stack ANY_BHW-REG") COMMENT("stack ANY_BHW-REG")
move %1, RSCRATCH move %1, RSCRATCH
stwu RSCRATCH, {IND_RC_W, SP, 0-4} stwu RSCRATCH, {IND_RC_W, sp, 0-4}
from IND_ALL_D to STACK from IND_ALL_D to STACK
gen gen
COMMENT("stack IND_ALL_D") COMMENT("stack IND_ALL_D")
move %1, FSCRATCH move %1, FSCRATCH
stfdu FSCRATCH, {IND_RC_D, SP, 0-8} stfdu FSCRATCH, {IND_RC_D, sp, 0-8}
from FREG to STACK from FREG to STACK
gen gen
COMMENT("stack FPR") COMMENT("stack FPR")
stfdu %1, {IND_RC_D, SP, 0-8} stfdu %1, {IND_RC_D, sp, 0-8}
from FSREG to STACK from FSREG to STACK
gen gen
COMMENT("stack FSREG") COMMENT("stack FSREG")
stfsu %1, {IND_RC_W, SP, 0-4} stfsu %1, {IND_RC_W, sp, 0-4}
@ -694,17 +623,17 @@ COERCIONS
uses REG uses REG
gen gen
COMMENT("coerce STACK->REG") COMMENT("coerce STACK->REG")
lwz %a, {IND_RC_W, SP, 0} lwz %a, {IND_RC_W, sp, 0}
addi SP, SP, {CONST, 4} addi sp, sp, {CONST, 4}
yields %a yields %a
from STACK from STACK
uses REG_PAIR uses REG_PAIR
gen gen
COMMENT("coerce STACK->REG_PAIR") COMMENT("coerce STACK->REG_PAIR")
lwz %a.1, {IND_RC_W, SP, 0} lwz %a.1, {IND_RC_W, sp, 0}
lwz %a.2, {IND_RC_W, SP, 4} lwz %a.2, {IND_RC_W, sp, 4}
addi SP, SP, {CONST, 8} addi sp, sp, {CONST, 8}
yields %a yields %a
from FSREG from FSREG
@ -723,16 +652,16 @@ COERCIONS
uses FREG uses FREG
gen gen
COMMENT("coerce STACK->FREG") COMMENT("coerce STACK->FREG")
lfd %a, {IND_RC_D, SP, 0} lfd %a, {IND_RC_D, sp, 0}
addi SP, SP, {CONST, 8} addi sp, sp, {CONST, 8}
yields %a yields %a
from STACK from STACK
uses FSREG uses FSREG
gen gen
COMMENT("coerce STACK->FSREG") COMMENT("coerce STACK->FSREG")
lfs %a, {IND_RC_W, SP, 0} lfs %a, {IND_RC_W, sp, 0}
addi SP, SP, {CONST, 4} addi sp, sp, {CONST, 4}
yields %a yields %a
from IND_ALL_W from IND_ALL_W
@ -847,10 +776,10 @@ PATTERNS
/* Local variables */ /* Local variables */
pat lal smalls($1) /* Load address of local */ pat lal smalls($1) /* Load address of local */
yields {SUM_RC, FP, $1} yields {SUM_RC, fp, $1}
pat lal /* Load address of local */ pat lal /* Load address of local */
uses REG={SUM_RIS, FP, his($1)} uses REG={SUM_RIS, fp, his($1)}
yields {SUM_RC, %a, los($1)} yields {SUM_RC, %a, los($1)}
pat lol inreg($1)>0 /* Load from local */ pat lol inreg($1)>0 /* Load from local */
@ -1045,9 +974,10 @@ PATTERNS
los 4 los 4
pat los $1==4 /* Load arbitrary size */ pat los $1==4 /* Load arbitrary size */
with GPR3 STACK with REG STACK
kills ALL kills ALL
gen gen
move %1, r3
bl {LABEL, ".los4"} bl {LABEL, ".los4"}
pat sti $1==INT8 /* Store byte indirect */ pat sti $1==INT8 /* Store byte indirect */
@ -1144,9 +1074,10 @@ PATTERNS
sts 4 sts 4
pat sts $1==4 /* Store arbitrary size */ pat sts $1==4 /* Store arbitrary size */
with GPR3 STACK with REG STACK
kills ALL kills ALL
gen gen
move %1, r3
bl {LABEL, ".sts4"} bl {LABEL, ".sts4"}
@ -1869,14 +1800,15 @@ PATTERNS
kills ALL kills ALL
gen gen
labeldef $1 labeldef $1
yields R3 yields r3
pat lab topeltsize($1)==4 && fallthrough($1) pat lab topeltsize($1)==4 && fallthrough($1)
with GPR3 STACK with REG STACK
kills ALL kills ALL
gen gen
move %1, r3
labeldef $1 labeldef $1
yields %1 yields r3
pat lab topeltsize($1)!=4 pat lab topeltsize($1)!=4
with STACK with STACK
@ -1885,8 +1817,9 @@ PATTERNS
labeldef $1 labeldef $1
pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */
with GPR3 STACK with REG STACK
gen gen
move %1, r3
b {LABEL, $1} b {LABEL, $1}
pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */
@ -1907,14 +1840,14 @@ PATTERNS
with REG STACK with REG STACK
kills ALL kills ALL
gen gen
mtspr CTR, %1 mtspr ctr, %1
bctrl. bctrl.
pat lfr $1==INT32 /* Load function result, word */ pat lfr $1==INT32 /* Load function result, word */
yields R3 yields r3
pat lfr $1==INT64 /* Load function result, double-word */ pat lfr $1==INT64 /* Load function result, double-word */
yields R4 R3 yields r4 r3
pat ret $1==0 /* Return from procedure */ pat ret $1==0 /* Return from procedure */
gen gen
@ -1922,14 +1855,17 @@ PATTERNS
b {LABEL, ".ret"} b {LABEL, ".ret"}
pat ret $1==INT32 /* Return from procedure, word */ pat ret $1==INT32 /* Return from procedure, word */
with GPR3 with REG
gen gen
move %1, r3
return return
b {LABEL, ".ret"} b {LABEL, ".ret"}
pat ret $1==INT64 /* Return from procedure, double-word */ pat ret $1==INT64 /* Return from procedure, double-word */
with GPR3 GPR4 with REG REG
gen gen
move %1, r3
move %2, r4
return return
b {LABEL, ".ret"} b {LABEL, ".ret"}
@ -1944,7 +1880,7 @@ PATTERNS
gen gen
/* Wrong if size is zero */ /* Wrong if size is zero */
srwi %1, %1, {CONST, 2} srwi %1, %1, {CONST, 2}
mtspr CTR, %1 mtspr ctr, %1
1: 1:
lwzx %a, %3, %b lwzx %a, %3, %b
stwx %a, %2, %b stwx %a, %2, %b
@ -1989,9 +1925,10 @@ PATTERNS
ste ".ignmask" ste ".ignmask"
pat trp /* Raise EM trap */ pat trp /* Raise EM trap */
with GPR3 with REG
kills ALL kills ALL
gen gen
move %1, r3
bl {LABEL, ".trap"} bl {LABEL, ".trap"}
pat sig /* Set trap handler */ pat sig /* Set trap handler */
@ -2032,55 +1969,55 @@ PATTERNS
uses REG uses REG
gen gen
move {LABEL, $1}, %a move {LABEL, $1}, %a
move {IND_RC_W, %a, 8}, FP move {IND_RC_W, %a, 8}, fp
move {IND_RC_W, %a, 4}, SP move {IND_RC_W, %a, 4}, sp
move {IND_RC_W, %a, 0}, %a move {IND_RC_W, %a, 0}, %a
mtspr CTR, %a mtspr ctr, %a
bctr. bctr.
pat lor $1==0 /* Load FP */ pat lor $1==0 /* Load FP */
uses REG uses REG
gen gen
move FP, %a move fp, %a
yields %a yields %a
pat lor $1==1 /* Load SP */ pat lor $1==1 /* Load SP */
uses REG uses REG
gen gen
move SP, %a move sp, %a
yields %a yields %a
pat str $1==0 /* Store FP */ pat str $1==0 /* Store FP */
with REG with REG
gen gen
move %1, FP move %1, fp
pat str $1==1 /* Store SP */ pat str $1==1 /* Store SP */
with REG with REG
gen gen
move %1, SP move %1, sp
pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */ pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */
with exact REG with exact REG
/* nop */ /* nop */
with STACK with STACK
gen gen
addi SP, SP, {CONST, 4} addi sp, sp, {CONST, 4}
pat ass $1==4 /* Adjust stack by variable amount */ pat ass $1==4 /* Adjust stack by variable amount */
with CONST2 STACK with CONST2 STACK
gen gen
move {SUM_RC, SP, %1.val}, SP move {SUM_RC, sp, %1.val}, sp
with CONST_HZ STACK with CONST_HZ STACK
gen gen
move {SUM_RC, SP, his(%1.val)}, SP move {SUM_RC, sp, his(%1.val)}, sp
with CONST_STACK-CONST2-CONST_HZ STACK with CONST_STACK-CONST2-CONST_HZ STACK
gen gen
move {SUM_RC, SP, his(%1.val)}, SP move {SUM_RC, sp, his(%1.val)}, sp
move {SUM_RC, SP, los(%1.val)}, SP move {SUM_RC, sp, los(%1.val)}, sp
with REG STACK with REG STACK
gen gen
move {SUM_RR, SP, %1}, SP move {SUM_RR, sp, %1}, sp
pat asp /* Adjust stack by constant amount */ pat asp /* Adjust stack by constant amount */
leaving leaving
@ -2190,7 +2127,7 @@ PATTERNS
with FREG FREG STACK with FREG FREG STACK
uses REG uses REG
gen gen
fcmpo CR0, %2, %1 fcmpo cr0, %2, %1
bxx* {LABEL, $2} bxx* {LABEL, $2}
/* Pop 2 singles, branch if... */ /* Pop 2 singles, branch if... */
@ -2332,7 +2269,7 @@ PATTERNS
with FREG FREG STACK with FREG FREG STACK
uses REG uses REG
gen gen
fcmpo CR0, %2, %1 fcmpo cr0, %2, %1
bxx* {LABEL, $2} bxx* {LABEL, $2}
/* Pop 2 doubles, branch if... */ /* Pop 2 doubles, branch if... */
@ -2356,8 +2293,8 @@ PATTERNS
uses reusing %1, FREG uses reusing %1, FREG
gen gen
fctiwz %a, %1 fctiwz %a, %1
stfdu %a, {IND_RC_D, SP, 0-8} stfdu %a, {IND_RC_D, sp, 0-8}
addi SP, SP, {CONST, 4} addi sp, sp, {CONST, 4}
/* Convert double to unsigned int */ /* Convert double to unsigned int */
pat loc loc cfu $1==8 && $2==4 pat loc loc cfu $1==8 && $2==4
@ -2379,13 +2316,13 @@ PATTERNS
REG={CONST_HZ, 0x80000000}, REG={CONST_HZ, 0x80000000},
FREG, FREG FREG, FREG
gen gen
stwu %b, {IND_RC_W, SP, 0-8} stwu %b, {IND_RC_W, sp, 0-8}
stw %a, {IND_RC_W, SP, 4} stw %a, {IND_RC_W, sp, 4}
lfd %d, {IND_RC_D, SP, 0} lfd %d, {IND_RC_D, sp, 0}
stw %c, {IND_RC_W, SP, 4} stw %c, {IND_RC_W, sp, 4}
lfd %e, {IND_RC_D, SP, 0} lfd %e, {IND_RC_D, sp, 0}
fsub %d, %d, %e fsub %d, %d, %e
addi SP, SP, {CONST, 8} addi sp, sp, {CONST, 8}
yields %d yields %d
/* /*
@ -2398,13 +2335,13 @@ PATTERNS
REG={CONST_0000_7FFF, 0}, REG={CONST_0000_7FFF, 0},
FREG, FREG FREG, FREG
gen gen
stwu %a, {IND_RC_W, SP, 0-8} stwu %a, {IND_RC_W, sp, 0-8}
stw %1, {IND_RC_W, SP, 4} stw %1, {IND_RC_W, sp, 4}
lfd %c, {IND_RC_D, SP, 0} lfd %c, {IND_RC_D, sp, 0}
stw %b, {IND_RC_W, SP, 4} stw %b, {IND_RC_W, sp, 4}
lfd %d, {IND_RC_D, SP, 0} lfd %d, {IND_RC_D, sp, 0}
fsub %c, %c, %d fsub %c, %c, %d
addi SP, SP, {CONST, 8} addi sp, sp, {CONST, 8}
yields %c yields %c
pat fef $1==8 /* Split fraction, exponent */ pat fef $1==8 /* Split fraction, exponent */