ack/mach/powerpc/ncg/table
George Koehler ad47fa5fe3 Add splitting coercions for IND_ALL_D.
Delete my wrong comment (from commits cfbc537, a8f62f4, 5432bd0) which
claimed that such coercions are not possible.
2017-12-18 20:59:04 -05:00

2395 lines
58 KiB
Plaintext

EM_WSIZE = 4
EM_PSIZE = 4
EM_BSIZE = 8 /* two words saved in call frame */
INT8 = 1 /* Size of values */
INT16 = 2
INT32 = 4
INT64 = 8
FP_OFFSET = 0 /* Offset of saved FP relative to our FP */
PC_OFFSET = 4 /* Offset of saved PC relative to our FP */
SL_OFFSET = 8 /* Offset of static link */
#define COMMENT(n) /* comment {LABEL, n} */
#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64)
#define smalls(n) sfit(n, 16)
#define smallu(n) ufit(n, 16)
#define lo(n) ((n) & 0xFFFF)
#define hi(n) (((n)>>16) & 0xFFFF)
/* Use these for instructions that treat the low half as signed --- his()
* includes a modifier to produce the correct value when the low half gets
* sign extended. Er, do make sure you load the low half second. */
#define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF)))
#define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF)
PROPERTIES
GPR /* general-purpose register */
REG /* allocatable GPR */
REG3 /* coercion to r3 */
FPR(8) /* floating-point register */
FREG(8) /* allocatable FPR */
FSREG /* allocatable single-precision FPR */
SPR /* special-purpose register */
CR /* condition register */
REGISTERS
/*
* When ncg allocates regvars, it seems to start with the last
* register in the first class. To encourage ncg to allocate
* them from r31 down, we list them in one class as
* r13, r14, ..., r31: GPR, REG regvar(reg_any).
*/
r0, sp, fp, r12 : GPR.
r3 : GPR, REG, REG3.
r4, r5, r6, r7, r8, r9, r10, r11 : GPR, REG.
r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24,
r25, r26, r27, r28, r29, r30, r31
: GPR, REG regvar(reg_any).
f0 : FPR.
f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13
: FPR, FREG.
f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25,
f26, f27, f28, f29, f30, f31
: FPR, FREG regvar(reg_float).
fs1("f1")=f1, fs2("f2")=f2, fs3("f3")=f3, fs4("f4")=f4,
fs5("f5")=f5, fs6("f6")=f6, fs7("f7")=f7, fs8("f8")=f8,
fs9("f9")=f9, fs10("f10")=f10, fs11("f11")=f11, fs12("f12")=f12,
fs13("f13")=f13
: FSREG.
/* reglap: reg_float may have subregister of different size */
fs14("f14")=f14, fs15("f15")=f15, fs16("f16")=f16, fs17("f17")=f17,
fs18("f18")=f18, fs19("f19")=f19, fs20("f20")=f20, fs21("f21")=f21,
fs22("f22")=f22, fs23("f23")=f23, fs24("f24")=f24, fs25("f25")=f25,
fs26("f26")=f26, fs27("f27")=f27, fs28("f28")=f28, fs29("f29")=f29,
fs30("f30")=f30, fs31("f31")=f31
: FSREG regvar(reg_float).
lr, ctr : SPR.
cr0 : CR.
/* The stacking rules and the splitting coercions can't
* allocate registers. We use r12 in the splitting coercions,
* and these scratch registers in the stacking rules.
*/
#define RSCRATCH r0
#define FSCRATCH f0
TOKENS
/* Primitives */
C /* constant */ = { INT val; } 4 val.
LABEL = { ADDR adr; } 4 adr.
LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]".
LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]".
LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]".
LOCAL = { INT off; } 4 ">>> BUG IN LOCAL".
DLOCAL = { INT off; } 8 ">>> BUG IN DLOCAL".
/* Allows us to use regvar() to refer to registers */
GPR_EXPR = { GPR reg; } 4 reg.
FPR_EXPR = { FPR reg; } 8 reg.
FSREG_EXPR = { FSREG reg; } 4 reg.
/* Constants on the stack */
CONST_N8000 = { INT val; } 4 val.
CONST_N7FFF_N0001 = { INT val; } 4 val.
CONST_0000_7FFF = { INT val; } 4 val.
CONST_8000 = { INT val; } 4 val.
CONST_8001_FFFF = { INT val; } 4 val.
CONST_HI_ZR = { INT val; } 4 val.
CONST_HI_LO = { INT val; } 4 val.
/* Expression partial results */
SEX_B = { GPR reg; } 4. /* sign extension */
SEX_H = { GPR reg; } 4.
SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */
SUM_RC = { GPR reg; INT off; } 4. /* reg + off */
SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */
SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */
SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */
NEG_R = { GPR reg; } 4. /* -reg */
MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */
DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */
DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */
IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")".
IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
IND_RR_B = { GPR reg1; GPR reg2; } 4.
IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")".
IND_RL_H = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
IND_RR_H = { GPR reg1; GPR reg2; } 4.
IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")".
IND_RL_H_S = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
IND_RR_H_S = { GPR reg1; GPR reg2; } 4.
IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")".
IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
IND_RR_W = { GPR reg1; GPR reg2; } 4.
IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")".
IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")".
IND_RR_D = { GPR reg1; GPR reg2; } 8.
NOT_R = { GPR reg; } 4. /* ~reg */
AND_RIS = { GPR reg; INT valhi; } 4.
AND_RC = { GPR reg; INT val; } 4.
AND_RR = { GPR reg1; GPR reg2; } 4.
ANDC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 & ~reg2 */
OR_RIS = { GPR reg; INT valhi; } 4.
OR_RC = { GPR reg; INT val; } 4.
OR_RR = { GPR reg1; GPR reg2; } 4.
ORC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 | ~reg2 */
XOR_RIS = { GPR reg; INT valhi; } 4.
XOR_RC = { GPR reg; INT val; } 4.
XOR_RR = { GPR reg1; GPR reg2; } 4.
NAND_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 & reg2) */
NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */
EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */
COND_RC = { GPR reg; INT val; } 4.
COND_RR = { GPR reg1; GPR reg2; } 4.
CONDL_RC = { GPR reg; INT val; } 4.
CONDL_RR = { GPR reg1; GPR reg2; } 4.
COND_FS = { FSREG reg1; FSREG reg2; } 4.
COND_FD = { FREG reg1; FREG reg2; } 4.
XEQ = { GPR reg; } 4.
XNE = { GPR reg; } 4.
XGT = { GPR reg; } 4.
XGE = { GPR reg; } 4.
XLT = { GPR reg; } 4.
XLE = { GPR reg; } 4.
SETS
/* signed 16-bit integer */
CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF.
/* integer that, when negated, fits signed 16-bit */
CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000.
/* unsigned 16-bit integer */
UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF.
/* any constant on stack */
CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF +
CONST_8000 + CONST_8001_FFFF +
CONST_HI_ZR + CONST_HI_LO.
CONST = C + CONST_STACK.
IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B.
IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H +
IND_RC_H_S + IND_RL_H_S + IND_RR_H_S.
IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W.
IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D.
/* anything killed by sti (store indirect) */
MEMORY = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D.
/* any integer from stack that we can easily move to GPR */
INT_W = REG + CONST_STACK + SEX_B + SEX_H +
SUM_RIS + SUM_RC + SUM_RL + SUM_RR +
SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U +
IND_ALL_B + IND_ALL_H + IND_ALL_W +
NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR +
OR_RIS + OR_RC + OR_RR + ORC_RR +
XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR +
XEQ + XNE + XGT + XGE + XLT + XLE.
FLOAT_D = FREG + IND_ALL_D.
FLOAT_W = FSREG + IND_ALL_W.
INSTRUCTIONS
/* We give time as cycles of total latency from Freescale
* Semiconductor, MPC7450 RISC Microprocessor Family Reference
* Manual, Rev. 5, section 6.6.
*
* We have only 4-byte alignment for doubles; 8-byte alignment is
* optimal. We guess the misalignment penalty by adding 1 cycle to
* the cost of loading or storing a double:
* lfd lfdu lfdx: 4 -> 5
* stfd stfdu stfdx: 3 -> 4
*/
cost(4, 1) /* space, time */
add GPR:wo, GPR:ro, GPR:ro.
addX "add." GPR:wo, GPR:ro, GPR:ro.
addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
li GPR:wo, CONST:ro.
addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro.
lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro.
and GPR:wo, GPR:ro, GPR:ro.
andc GPR:wo, GPR:ro, GPR:ro.
andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro.
andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro.
b LABEL:ro.
bc CONST:ro, CONST:ro, LABEL:ro.
bdnz LABEL:ro.
beq LABEL:ro.
bne LABEL:ro.
bgt LABEL:ro.
bge LABEL:ro.
blt LABEL:ro.
ble LABEL:ro.
bxx LABEL:ro. /* dummy */
bcctr CONST:ro, CONST:ro, CONST:ro.
bctr.
bcctrl CONST:ro, CONST:ro, CONST:ro.
bctrl.
bclr CONST:ro, CONST:ro, CONST:ro.
blr.
bl LABEL:ro.
cmp CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
cmpw GPR:ro, GPR:ro kills :cc.
cmpi CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
cmpwi GPR:ro, CONST:ro kills :cc.
cmpl CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
cmplw GPR:ro, GPR:ro kills :cc.
cmpli CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
cmplwi GPR:ro, CONST:ro kills :cc.
divw GPR:wo, GPR:ro, GPR:ro cost(4, 23).
divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23).
eqv GPR:wo, GPR:ro, GPR:ro.
extsb GPR:wo, GPR:ro.
extsh GPR:wo, GPR:ro.
fadd FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5).
fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
fctiwz FREG:wo, FREG:ro.
fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
fmr FPR:wo, FPR:ro cost(4, 5).
fmr FSREG:wo, FSREG:ro cost(4, 5).
fmul FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5).
fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5).
frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5).
fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3).
lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5).
lfdu FPR:wo, IND_RC_D:ro cost(4, 5).
lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5).
lfs FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4).
lfsu FSREG:wo, IND_RC_W:rw cost(4, 4).
lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4).
lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3).
lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3).
lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3).
lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
lwzu GPR:wo, IND_RC_W:rw cost(4, 3).
lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3).
mfcr GPR:wo cost(4,2).
mfspr GPR:wo, SPR:ro cost(4, 3).
mtspr SPR:wo, GPR:ro cost(4, 2).
mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4).
nand GPR:wo, GPR:ro, GPR:ro.
neg GPR:wo, GPR:ro.
nor GPR:wo, GPR:ro, GPR:ro.
or GPR:wo, GPR:ro, GPR:ro.
mr GPR:wo, GPR:ro.
orX "or." GPR:wo:cc, GPR:ro, GPR:ro.
orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro.
orc GPR:wo, GPR:ro, GPR:ro.
ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
oris GPR:wo, GPR:ro, CONST:ro.
rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro.
extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
rotlwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
rotrwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
slwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
srwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro.
rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro.
slw GPR+LOCAL:wo, GPR:ro, GPR:ro.
subf GPR:wo, GPR:ro, GPR:ro.
sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2).
srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2).
srw GPR+LOCAL:wo, GPR:ro, GPR:ro.
stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3).
stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4).
stfdu FPR:ro, IND_RC_D:rw cost(4, 4).
stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4).
stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
stfsu FSREG:ro, IND_RC_W:rw cost(4, 3).
stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3).
sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3).
sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
stwu GPR:ro, IND_RC_W:rw cost(4, 3).
xor GPR:wo, GPR:ro, GPR:ro.
xori GPR:wo, GPR:ro, CONST:ro.
xoris GPR:wo, GPR:ro, CONST:ro.
bug ">>> BUG" LABEL:ro cost(0, 0).
comment "!" LABEL:ro cost(0, 0).
MOVES
from GPR to GPR
gen mr %2, %1
from FSREG to FSREG
gen fmr %2, %1
from FPR to FPR
gen fmr %2, %1
/* Constants */
from CONST smalls(%val) to GPR
gen
COMMENT("move CONST->GPR smalls")
li %2, %1
from CONST lo(%val)==0 to GPR
gen
COMMENT("move CONST->GPR shifted")
lis %2, {C, hi(%1.val)}
from CONST to GPR
gen
COMMENT("move CONST->GPR")
lis %2, {C, hi(%1.val)}
ori %2, %2, {C, lo(%1.val)}
/* Can't use addi %2, %2, {C, los(%1.val)}
* because %2 might be R0. */
from LABEL to GPR
gen
COMMENT("move LABEL->GPR")
lis %2, {LABEL_HI, %1.adr}
ori %2, %2, {LABEL_LO, %1.adr}
from LABEL_HA to GPR
gen lis %2, %1
/* Sign extension */
from SEX_B to GPR
gen extsb %2, %1.reg
from SEX_H to GPR
gen extsh %2, %1.reg
/* Register + something */
from SUM_RIS to GPR
gen addis %2, %1.reg, {C, %1.offhi}
from SUM_RC to GPR
gen addi %2, %1.reg, {C, %1.off}
from SUM_RL to GPR
gen addi %2, %1.reg, {LABEL_LO, %1.adr}
from SUM_RR to GPR
gen add %2, %1.reg1, %1.reg2
/* Other arithmetic */
from SUB_RR to GPR
/* reg1 - reg2 -> subtract reg2 from reg1 */
gen subf %2, %1.reg2, %1.reg1
from NEG_R to GPR
gen neg %2, %1.reg
from MUL_RR to GPR
gen mullw %2, %1.reg1, %1.reg2
from DIV_RR to GPR
gen divw %2, %1.reg1, %1.reg2
from DIV_RR_U to GPR
gen divwu %2, %1.reg1, %1.reg2
/* Read byte */
from IND_RC_B+IND_RL_B to GPR
gen lbz %2, %1
from IND_RR_B to GPR
gen lbzx %2, %1.reg1, %1.reg2
/* Write byte */
from GPR to IND_RC_B+IND_RL_B
gen stb %1, %2
from GPR to IND_RR_B
gen stbx %1, %2.reg1, %2.reg2
/* Read halfword (short) */
from IND_RC_H+IND_RL_H to GPR
gen lhz %2, %1
from IND_RR_H to GPR
gen lhzx %2, %1.reg1, %1.reg2
from IND_RC_H_S+IND_RL_H_S to GPR
gen lha %2, %1
from IND_RR_H_S to GPR
gen lhax %2, %1.reg1, %1.reg2
/* Write halfword */
from GPR to IND_RC_H+IND_RL_H
gen sth %1, %2
from GPR to IND_RR_H
gen sthx %1, %2.reg1, %2.reg2
/* Read word */
from IND_RC_W+IND_RL_W to GPR
gen lwz %2, %1
from IND_RR_W to GPR
gen lwzx %2, %1.reg1, %1.reg2
from IND_RC_W+IND_RL_W to FSREG
gen lfs %2, %1
from IND_RR_W to FSREG
gen lfsx %2, %1.reg1, %1.reg2
/* Write word */
from GPR to IND_RC_W+IND_RL_W
gen stw %1, %2
from GPR to IND_RR_W
gen stwx %1, %2.reg1, %2.reg2
from FSREG to IND_RC_W+IND_RL_W
gen stfs %1, %2
from FSREG to IND_RR_W
gen stfsx %1, %2.reg1, %2.reg2
/* Read double */
from IND_RC_D+IND_RL_D to FPR
gen lfd %2, %1
from IND_RR_D to FPR
gen lfdx %2, %1.reg1, %1.reg2
/* Write double */
from FPR to IND_RC_D+IND_RL_D
gen stfd %1, %2
from FPR to IND_RR_D
gen stfdx %1, %2.reg1, %2.reg2
/* Logicals */
from NOT_R to GPR
gen nor %2, %1.reg, %1.reg
from AND_RIS to GPR
gen andisX %2, %1.reg, {C, %1.valhi}
from AND_RC to GPR
gen andiX %2, %1.reg, {C, %1.val}
from AND_RR to GPR
gen and %2, %1.reg1, %1.reg2
from ANDC_RR to GPR
gen andc %2, %1.reg1, %1.reg2
from OR_RIS to GPR
gen oris %2, %1.reg, {C, %1.valhi}
from OR_RC to GPR
gen ori %2, %1.reg, {C, %1.val}
from OR_RR to GPR
gen or %2, %1.reg1, %1.reg2
from ORC_RR to GPR
gen orc %2, %1.reg1, %1.reg2
from XOR_RIS to GPR
gen xoris %2, %1.reg, {C, %1.valhi}
from XOR_RC to GPR
gen xori %2, %1.reg, {C, %1.val}
from XOR_RR to GPR
gen xor %2, %1.reg1, %1.reg2
from NAND_RR to GPR
gen nand %2, %1.reg1, %1.reg2
from NOR_RR to GPR
gen nor %2, %1.reg1, %1.reg2
from EQV_RR to GPR
gen eqv %2, %1.reg1, %1.reg2
/* Conditions */
/* Compare values, then copy cr0 to GPR. */
from COND_RC to GPR
gen
cmpwi %1.reg, {C, %1.val}
mfcr %2
from COND_RR to GPR
gen
cmpw %1.reg1, %1.reg2
mfcr %2
from CONDL_RC to GPR
gen
cmplwi %1.reg, {C, %1.val}
mfcr %2
from CONDL_RR to GPR
gen
cmplw %1.reg1, %1.reg2
mfcr %2
from COND_FS to GPR
gen
fcmpo cr0, %1.reg1, %1.reg2
mfcr %2
from COND_FD to GPR
gen
fcmpo cr0, %1.reg1, %1.reg2
mfcr %2
/* Given a copy of cr0 in %1.reg, extract a condition bit
* (lt, gt, eq) and perhaps flip it.
*/
from XEQ to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 2}
from XNE to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 2}
xori %2, %2, {C, 1}
from XGT to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 1}
from XGE to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 0}
xori %2, %2, {C, 1}
from XLT to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 0}
from XLE to GPR
gen
extrwi %2, %1.reg, {C, 1}, {C, 1}
xori %2, %2, {C, 1}
/* GPR_EXPR exists solely to allow us to use regvar() (which can only
be used in an expression) as a register constant. We can then use
our moves to GPR to set register variables. We define no moves to
LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */
from INT_W to GPR_EXPR
gen move %1, %2.reg
from FLOAT_D to FPR_EXPR
gen move %1, %2.reg
from FLOAT_W to FSREG_EXPR
gen move %1, %2.reg
TESTS
/* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1,
* not allowed here". We use orX_readonly to trick ncgg.
*
* Using "or." and not "mr." because mach/powerpc/top/table
* was optimizing "or." and not "mr.".
*/
to test GPR
gen
orX_readonly %1, %1, %1
STACKINGRULES
from REG to STACK
gen
COMMENT("stack REG")
stwu %1, {IND_RC_W, sp, 0-4}
from INT_W-REG to STACK
gen
COMMENT("stack INT_W-REG")
move %1, RSCRATCH
stwu RSCRATCH, {IND_RC_W, sp, 0-4}
from FLOAT_D-FREG to STACK
gen
COMMENT("stack FLOAT_D-FREG")
move %1, FSCRATCH
stfdu FSCRATCH, {IND_RC_D, sp, 0-8}
from FREG to STACK
gen
COMMENT("stack FREG")
stfdu %1, {IND_RC_D, sp, 0-8}
from FSREG to STACK
gen
COMMENT("stack FSREG")
stfsu %1, {IND_RC_W, sp, 0-4}
/*
* We never stack LOCAL or DLOCAL tokens, because we only use
* them for register variables, so ncg pushes the register,
* not the token. These rules only prevent an error in ncgg.
*/
from LOCAL to STACK
gen bug {LABEL, "STACKING LOCAL"}
from DLOCAL to STACK
gen bug {LABEL, "STACKING DLOCAL"}
COERCIONS
from STACK
uses REG
gen
COMMENT("coerce STACK->REG")
lwz %a, {IND_RC_W, sp, 0}
addi sp, sp, {C, 4}
yields %a
from STACK
uses FREG
gen
COMMENT("coerce STACK->FREG")
lfd %a, {IND_RC_D, sp, 0}
addi sp, sp, {C, 8}
yields %a
from STACK
uses FSREG
gen
COMMENT("coerce STACK->FSREG")
lfs %a, {IND_RC_W, sp, 0}
addi sp, sp, {C, 4}
yields %a
/* "uses REG=%1" may find and reuse a register containing the
* same token. For contrast, "uses REG gen move %1, %a" would
* pick a different register before doing the move.
*/
from INT_W
uses REG=%1
yields %a
from FLOAT_D
uses FREG=%1
yields %a
from FLOAT_W
uses FSREG=%1
yields %a
/* Splitting coercions can't allocate registers.
* PowerPC can't add r0 + constant. Use r12.
*/
from IND_RC_D %off<=0x7FFA
yields
{IND_RC_W, %1.reg, %1.off+4}
{IND_RC_W, %1.reg, %1.off}
from IND_RC_D
/* Don't move to %1.reg; it might be a regvar. */
gen move {SUM_RC, %1.reg, %1.off}, r12
yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
from IND_RR_D
gen move {SUM_RR, %1.reg1, %1.reg2}, r12
yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
PATTERNS
/* Constants */
pat loc $1==(0-0x8000) /* Load constant */
yields {CONST_N8000, $1}
pat loc $1>=(0-0x7FFF) && $1<=(0-1)
yields {CONST_N7FFF_N0001, $1}
pat loc $1>=0 && $1<=0x7FFF
yields {CONST_0000_7FFF, $1}
pat loc $1==0x8000
yields {CONST_8000, $1}
pat loc $1>=0x8001 && $1<=0xFFFF
yields {CONST_8001_FFFF, $1}
pat loc lo($1)==0
yields {CONST_HI_ZR, $1}
pat loc
yields {CONST_HI_LO, $1}
/* Stack shuffles */
/* The peephole optimizer does: loc $1 ass 4 -> asp $1
* To optimize multiplication, it uses: dup 8 asp 4
*/
pat asp $1==4 /* Adjust stack by constant */
with exact INT_W+FLOAT_W
/* drop %1 */
with STACK
gen addi sp, sp, {C, 4}
pat asp smalls($1)
with STACK
gen addi sp, sp, {C, $1}
pat asp lo($1)==0
with STACK
gen addi sp, sp, {C, hi($1)}
pat asp
with STACK
gen
addis sp, sp, {C, his($1)}
addi sp, sp, {C, los($1)}
pat ass $1==4 /* Adjust stack by variable */
with REG STACK
gen add sp, sp, %1
/* To duplicate a token, we coerce the token into a register,
* then duplicate the register. This decreases code size.
*/
pat dup $1==4 /* Duplicate word on top of stack */
with REG+FSREG
yields %1 %1
pat dup $1==8 /* Duplicate double-word */
with REG+FSREG REG+FSREG
yields %2 %1 %2 %1
with FREG
yields %1 %1
pat dup /* Duplicate other size */
leaving
loc $1
dus 4
pat dus $1==4 /* Duplicate variable size */
with REG STACK
/* ( a size%1 -- a a ) */
uses REG, REG
gen
srwi %a, %1, {C, 2}
mtspr ctr, %a
add %b, sp, %1
1: lwzu %a, {IND_RC_W, %b, 0-4}
stwu %a, {IND_RC_W, sp, 0-4}
bdnz {LABEL, "1b"}
pat exg $1==4 /* Exchange top two words */
with INT_W+FLOAT_W INT_W+FLOAT_W
yields %1 %2
pat exg defined($1) /* Exchange other size */
leaving
loc $1
cal ".exg"
pat exg !defined($1)
leaving
cal ".exg"
pat ste loe $1==$2 /* Store then load external */
leaving
dup 4
ste $1
/* Type conversions */
pat loc loc ciu /* signed -> unsigned */
leaving
loc $1
loc $2
cuu
pat loc loc cui /* unsigned -> signed */
leaving
loc $1
loc $2
cuu
pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */
/* nop */
pat loc loc cii $1<=4 && $2<=$1
/* signed -> signed of smaller or same size,
* no sign extension */
pat loc loc cii $1==1 && $2<=4 /* sign-extend char */
with REG
yields {SEX_B, %1}
pat loc loc cii $1==2 && $2<=4 /* sign-extend short */
with REG
yields {SEX_H, %1}
/* Local variables */
pat lal smalls($1) /* Load address of local */
yields {SUM_RC, fp, $1}
pat lal /* Load address of local */
uses REG={SUM_RIS, fp, his($1)}
yields {SUM_RC, %a, los($1)}
/* Load word from local */
pat lol inreg($1)==reg_any || inreg($1)==reg_float
yields {LOCAL, $1}
pat lol
leaving
lal $1
loi 4
/* Load double-word from local */
pat ldl inreg($1)==reg_float
yields {DLOCAL, $1}
pat ldl
leaving
lal $1
loi 8
/* Store word to local */
pat stl inreg($1)==reg_any
with exact INT_W
/* ncg fails to infer that regvar($1) is dead! */
kills regvar($1)
gen move %1, {GPR_EXPR, regvar($1)}
with STACK
gen
lwz {LOCAL, $1}, {IND_RC_W, sp, 0}
addi sp, sp, {C, 4}
pat stl inreg($1)==reg_float
with exact FSREG+IND_ALL_W
kills regvar_w($1, reg_float)
gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)}
with STACK
gen
lfs {LOCAL, $1}, {IND_RC_W, sp, 0}
addi sp, sp, {C, 4}
pat stl
leaving
lal $1
sti 4
/* Store double-word to local */
pat sdl inreg($1)==reg_float
with exact FREG+IND_ALL_D
kills regvar_d($1, reg_float)
gen move %1, {FPR_EXPR, regvar_d($1, reg_float)}
with STACK
gen
lfd {DLOCAL, $1}, {IND_RC_D, sp, 0}
addi sp, sp, {C, 8}
pat sdl
leaving
lal $1
sti 8
/* Load indirect from local */
pat lil inreg($1)==reg_any
yields {IND_RC_W, regvar($1), 0}
pat lil
leaving
lol $1
loi 4
pat sil /* Save to indirected local */
leaving
lol $1
sti 4
pat zrl /* Zero local */
leaving
loc 0
stl $1
pat inl /* Increment local */
leaving
lol $1
loc 1
adi 4
stl $1
pat del /* Decrement local */
leaving
lol $1
loc 1
sbi 4
stl $1
/* Global variables */
pat lpi /* Load address of external function */
leaving
lae $1
pat lae /* Load address of external */
uses REG={LABEL_HA, $1}
yields {SUM_RL, %a, $1}
pat loe /* Load word external */
leaving
lae $1
loi INT32
pat ste /* Store word external */
leaving
lae $1
sti INT32
pat lde /* Load double-word external */
leaving
lae $1
loi INT64
pat sde /* Store double-word external */
leaving
lae $1
sti INT64
pat zre /* Zero external */
leaving
loc 0
ste $1
pat ine /* Increment external */
leaving
loe $1
inc
ste $1
pat dee /* Decrement external */
leaving
loe $1
dec
ste $1
/* Structures */
pat lof /* Load word offsetted */
leaving
adp $1
loi INT32
pat ldf /* Load double-word offsetted */
leaving
adp $1
loi INT64
pat stf /* Store word offsetted */
leaving
adp $1
sti INT32
pat sdf /* Store double-word offsetted */
leaving
adp $1
sti INT64
/* Loads and stores */
pat loi $1==INT8 /* Load byte indirect */
with REG
yields {IND_RC_B, %1, 0}
with exact SUM_RC
yields {IND_RC_B, %1.reg, %1.off}
with exact SUM_RL
yields {IND_RL_B, %1.reg, %1.adr}
with exact SUM_RR
yields {IND_RR_B, %1.reg1, %1.reg2}
pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32
/* Load half-word indirect and sign extend */
with REG
yields {IND_RC_H_S, %1, 0}
with exact SUM_RC
yields {IND_RC_H_S, %1.reg, %1.off}
with exact SUM_RL
yields {IND_RL_H_S, %1.reg, %1.adr}
with exact SUM_RR
yields {IND_RR_H_S, %1.reg1, %1.reg2}
pat loi $1==INT16 /* Load half-word indirect */
with REG
yields {IND_RC_H, %1, 0}
with exact SUM_RC
yields {IND_RC_H, %1.reg, %1.off}
with exact SUM_RL
yields {IND_RL_H, %1.reg, %1.adr}
with exact SUM_RR
yields {IND_RR_H, %1.reg1, %1.reg2}
pat loi $1==INT32 /* Load word indirect */
with REG
yields {IND_RC_W, %1, 0}
with exact SUM_RC
yields {IND_RC_W, %1.reg, %1.off}
with exact SUM_RL
yields {IND_RL_W, %1.reg, %1.adr}
with exact SUM_RR
yields {IND_RR_W, %1.reg1, %1.reg2}
pat loi $1==INT64 /* Load double-word indirect */
with REG
yields {IND_RC_D, %1, 0}
with exact SUM_RC
yields {IND_RC_D, %1.reg, %1.off}
with exact SUM_RL
yields {IND_RL_D, %1.reg, %1.adr}
with exact SUM_RR
yields {IND_RR_D, %1.reg1, %1.reg2}
pat loi /* Load arbitrary size */
leaving
loc $1
los 4
pat los $1==4 /* Load arbitrary size */
with REG3 STACK
kills ALL
gen
bl {LABEL, ".los4"}
pat sti $1==INT8 /* Store byte indirect */
with REG REG
kills MEMORY
gen move %2, {IND_RC_B, %1, 0}
with SUM_RC REG
kills MEMORY
gen move %2, {IND_RC_B, %1.reg, %1.off}
with SUM_RL REG
kills MEMORY
gen move %2, {IND_RL_B, %1.reg, %1.adr}
with SUM_RR REG
kills MEMORY
gen move %2, {IND_RR_B, %1.reg1, %1.reg2}
pat sti $1==INT16 /* Store half-word indirect */
with REG REG
kills MEMORY
gen move %2, {IND_RC_H, %1, 0}
with SUM_RC REG
kills MEMORY
gen move %2, {IND_RC_H, %1.reg, %1.off}
with SUM_RL REG
kills MEMORY
gen move %2, {IND_RL_H, %1.reg, %1.adr}
with SUM_RR REG
kills MEMORY
gen move %2, {IND_RR_H, %1.reg1, %1.reg2}
pat sti $1==INT32 /* Store word indirect */
with REG REG+FSREG
kills MEMORY
gen move %2, {IND_RC_W, %1, 0}
with SUM_RC REG+FSREG
kills MEMORY
gen move %2, {IND_RC_W, %1.reg, %1.off}
with SUM_RL REG+FSREG
kills MEMORY
gen move %2, {IND_RL_W, %1.reg, %1.adr}
with SUM_RR REG+FSREG
kills MEMORY
gen move %2, {IND_RR_W, %1.reg1, %1.reg2}
pat sti $1==INT64 /* Store double-word indirect */
with REG FREG
kills MEMORY
gen move %2, {IND_RC_D, %1, 0}
with SUM_RC FREG
kills MEMORY
gen move %2, {IND_RC_D, %1.reg, %1.off}
with SUM_RL FREG
kills MEMORY
gen move %2, {IND_RL_D, %1.reg, %1.adr}
with SUM_RR FREG
kills MEMORY
gen move %2, {IND_RR_D, %1.reg1, %1.reg2}
with REG REG REG
kills MEMORY
gen
move %2, {IND_RC_W, %1, 0}
move %3, {IND_RC_W, %1, 4}
pat sti /* Store arbitrary size */
leaving
loc $1
sts 4
pat sts $1==4 /* Store arbitrary size */
with REG3 STACK
kills ALL
gen
bl {LABEL, ".sts4"}
/* Arithmetic wrappers */
pat ads $1==4 /* Add var to pointer */
leaving adi $1
pat sbs $1==4 /* Subtract var from pointer */
leaving sbi $1
pat adp /* Add constant to pointer */
leaving
loc $1
adi 4
pat adu /* Add unsigned */
leaving
adi $1
pat sbu /* Subtract unsigned */
leaving
sbi $1
pat inc /* Add 1 */
leaving
loc 1
adi 4
pat dec /* Subtract 1 */
leaving
loc 1
sbi 4
pat mlu /* Multiply unsigned */
leaving
mli $1
pat slu /* Shift left unsigned */
leaving
sli $1
/* Word arithmetic */
pat adi $1==4 /* Add word (second + top) */
with REG REG
yields {SUM_RR, %1, %2}
with CONST2 REG
yields {SUM_RC, %2, %1.val}
with REG CONST2
yields {SUM_RC, %1, %2.val}
with CONST_HI_ZR REG
yields {SUM_RIS, %2, his(%1.val)}
with REG CONST_HI_ZR
yields {SUM_RIS, %1, his(%2.val)}
with CONST_STACK-CONST2-CONST_HI_ZR REG
uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
yields {SUM_RC, %a, los(%1.val)}
with REG CONST_STACK-CONST2-CONST_HI_ZR
uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
yields {SUM_RC, %a, los(%2.val)}
pat sbi $1==4 /* Subtract word (second - top) */
with REG REG
uses reusing %2, REG
yields {SUB_RR, %2, %1}
with CONST2_WHEN_NEG REG
yields {SUM_RC, %2, 0-%1.val}
with CONST_HI_ZR REG
yields {SUM_RIS, %2, his(0-%1.val)}
with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG
uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
yields {SUM_RC, %a, los(0-%1.val)}
pat ngi $1==4 /* Negate word */
with REG
yields {NEG_R, %1}
pat mli $1==4 /* Multiply word (second * top) */
with REG REG
yields {MUL_RR, %2, %1}
pat dvi $1==4 /* Divide word (second / top) */
with REG REG
yields {DIV_RR, %2, %1}
pat dvu $1==4 /* Divide unsigned word (second / top) */
with REG REG
yields {DIV_RR_U, %2, %1}
/* To calculate a remainder: a % b = a - (a / b * b) */
pat rmi $1==4 /* Remainder word (second % top) */
with REG REG
uses REG={DIV_RR, %2, %1}, REG
gen move {MUL_RR, %a, %1}, %b
yields {SUB_RR, %2, %b}
pat rmu $1==4 /* Remainder unsigned word (second % top) */
with REG REG
uses REG={DIV_RR_U, %2, %1}, REG
gen move {MUL_RR, %a, %1}, %b
yields {SUB_RR, %2, %b}
/* Bitwise logic */
pat and $1==4 /* AND word */
with REG NOT_R
yields {ANDC_RR, %1, %2.reg}
with NOT_R REG
yields {ANDC_RR, %2, %1.reg}
with REG REG
yields {AND_RR, %1, %2}
with REG UCONST2
yields {AND_RC, %1, %2.val}
with UCONST2 REG
yields {AND_RC, %2, %1.val}
with REG CONST_HI_ZR
yields {AND_RIS, %1, hi(%2.val)}
with CONST_HI_ZR REG
yields {AND_RIS, %2, hi(%1.val)}
pat and defined($1) /* AND set */
leaving
loc $1
cal ".and"
pat and !defined($1)
leaving
cal ".and"
pat ior $1==4 /* OR word */
with REG NOT_R
yields {ORC_RR, %1, %2.reg}
with NOT_R REG
yields {ORC_RR, %2, %1.reg}
with REG REG
yields {OR_RR, %1, %2}
with REG UCONST2
yields {OR_RC, %1, %2.val}
with UCONST2 REG
yields {OR_RC, %2, %1.val}
with REG CONST_HI_ZR
yields {OR_RIS, %1, hi(%2.val)}
with CONST_HI_ZR REG
yields {OR_RIS, %2, hi(%1.val)}
with REG CONST_STACK-UCONST2-CONST_HI_ZR
uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
yields {OR_RC, %1, lo(%2.val)}
with CONST_STACK-UCONST2-CONST_HI_ZR REG
uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
yields {OR_RC, %2, lo(%1.val)}
pat ior defined($1) /* OR set */
leaving
loc $1
cal ".ior"
/* OR set (variable), used in lang/m2/libm2/LtoUset.e */
pat ior !defined($1)
leaving
cal ".ior"
pat xor $1==4 /* XOR word */
with REG REG
yields {XOR_RR, %1, %2}
with REG UCONST2
yields {XOR_RC, %1, %2.val}
with UCONST2 REG
yields {XOR_RC, %2, %1.val}
with REG CONST_HI_ZR
yields {XOR_RIS, %1, hi(%2.val)}
with CONST_HI_ZR REG
yields {XOR_RIS, %2, hi(%1.val)}
with REG CONST_STACK-UCONST2-CONST_HI_ZR
uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
yields {XOR_RC, %1, lo(%2.val)}
with CONST_STACK-UCONST2-CONST_HI_ZR REG
uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
yields {XOR_RC, %2, lo(%1.val)}
pat xor defined($1) /* XOR set */
leaving
loc $1
cal ".xor"
pat xor !defined($1)
leaving
cal ".xor"
pat com $1==INT32 /* NOT word */
with exact AND_RR
yields {NAND_RR, %1.reg1, %1.reg2}
with exact OR_RR
yields {NOR_RR, %1.reg1, %1.reg2}
with exact XOR_RR
yields {EQV_RR, %1.reg1, %1.reg2}
with REG
yields {NOT_R, %1}
pat com defined($1) /* NOT set */
leaving
loc $1
cal ".com"
pat com !defined($1)
leaving
cal ".com"
pat zer $1==4 /* Push zero */
leaving
loc 0
pat zer defined($1) /* Create empty set */
leaving
loc $1
cal ".zer"
/* Shifts and rotations */
pat sli $1==4 /* Shift left (second << top) */
with CONST_STACK REG
uses reusing %2, REG
gen slwi %a, %2, {C, %1.val & 0x1F}
yields %a
with REG REG
uses reusing %2, REG
gen slw %a, %2, %1
yields %a
pat sli stl $1==4 && inreg($2)==reg_any
with CONST_STACK REG
gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
with REG REG
gen slw {LOCAL, $2}, %2, %1
pat sri $1==4 /* Shift right signed (second >> top) */
with CONST_STACK REG
uses reusing %2, REG
gen srawi %a, %2, {C, %1.val & 0x1F}
yields %a
with REG REG
uses reusing %2, REG
gen sraw %a, %2, %1
yields %a
pat sri stl $1==4 && inreg($2)==reg_any
with CONST_STACK REG
gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
with REG REG
gen sraw {LOCAL, $2}, %2, %1
pat sru $1==4 /* Shift right unsigned (second >> top) */
with CONST_STACK REG
uses reusing %2, REG
gen srwi %a, %2, {C, %1.val & 0x1F}
yields %a
with REG REG
uses reusing %2, REG
gen srw %a, %2, %1
yields %a
pat sru stl $1==4 && inreg($2)==reg_any
with CONST_STACK REG
gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
with REG REG
gen srw {LOCAL, $2}, %2, %1
pat rol $1==4 /* Rotate left word */
with CONST_STACK REG
uses reusing %2, REG
gen rotlwi %a, %2, {C, %1.val & 0x1F}
yields %a
with REG REG
uses reusing %2, REG
gen rotlw %a, %2, %1
yields %a
pat rol stl $1==4 && inreg($2)==reg_any
with CONST_STACK REG
gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
with REG REG
gen rotlw {LOCAL, $2}, %2, %1
/*
* ror 4 -> ngi 4, rol 4
* because to rotate right by n bits is to rotate left by
* (32 - n), which is to rotate left by -n. PowerPC rotlw
* handles -n as (-n & 0x1F).
*/
pat ror $1==4 /* Rotate right word */
with CONST_STACK REG
uses reusing %2, REG
gen rotrwi %a, %2, {C, %1.val & 0x1F}
yields %a
with /* anything */
leaving
ngi 4
rol 4
pat ror stl $1==4 && inreg($2)==reg_any
with CONST_STACK REG
gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
with /* anything */
leaving
ngi 4
rol 4
stl $2
/* Arrays */
pat aar $1==4 /* Address of array element */
leaving
cal ".aar4"
pat lar $1==4 /* Load from array */
with STACK
kills ALL
gen
bl {LABEL, ".aar4"}
/* pass r3 = size from .aar4 to .los4 */
bl {LABEL, ".los4"}
pat lae lar $2==4 && nicesize(rom($1, 3))
leaving
lae $1
aar 4
loi rom($1, 3)
pat sar $1==4 /* Store to array */
with STACK
kills ALL
gen
bl {LABEL, ".aar4"}
/* pass r3 = size from .aar4 to .sts4 */
bl {LABEL, ".sts4"}
pat lae sar $2==4 && nicesize(rom($1, 3))
leaving
lae $1
aar 4
sti rom($1, 3)
/* Sets */
pat set defined($1) /* Create singleton set */
leaving
loc $1
cal ".set"
/* Create set (variable), used in lang/m2/libm2/LtoUset.e */
pat set !defined($1)
leaving
cal ".set"
pat inn defined($1) /* Test for set bit */
leaving
loc $1
cal ".inn"
pat inn !defined($1)
leaving
cal ".inn"
/* Boolean resolutions */
pat teq /* top = (top == 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XEQ, %a}
pat tne /* top = (top != 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XNE, %a}
pat tlt /* top = (top < 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XLT, %a}
pat tle /* top = (top <= 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XLE, %a}
pat tgt /* top = (top > 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XGT, %a}
pat tge /* top = (top >= 0) */
with REG
uses reusing %1, REG
gen
test %1
mfcr %a
yields {XGE, %a}
pat cmi teq $1==4 /* Signed second == top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XEQ, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XEQ, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XEQ, %a}
pat cmi tne $1==4 /* Signed second != top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XNE, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XNE, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XNE, %a}
pat cmi tgt $1==4 /* Signed second > top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XLT, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XGT, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XGT, %a}
pat cmi tge $1==4 /* Signed second >= top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XLE, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XGE, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XGE, %a}
pat cmi tlt $1==4 /* Signed second < top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XGT, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XLT, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XLT, %a}
pat cmi tle $1==4 /* Signed second <= top */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
yields {XGE, %a}
with CONST2 REG
uses reusing %1, REG={COND_RC, %2, %1.val}
yields {XLE, %a}
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
yields {XLE, %a}
pat cmu teq $1==4 /* Unsigned second == top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XEQ, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XEQ, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XEQ, %a}
pat cmu tne $1==4 /* Unsigned second != top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XNE, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XNE, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XNE, %a}
pat cmu tgt $1==4 /* Unsigned second > top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XLT, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XGT, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XGT, %a}
pat cmu tge $1==4 /* Unsigned second >= top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XLE, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XGE, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XGE, %a}
pat cmu tlt $1==4 /* Unsigned second < top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XGT, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XLT, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XLT, %a}
pat cmu tle $1==4 /* Unsigned second <= top */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
yields {XGE, %a}
with UCONST2 REG
uses reusing %1, REG={CONDL_RC, %2, %1.val}
yields {XLE, %a}
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
yields {XLE, %a}
/* Simple branches */
proc zxx example zeq
with REG STACK
gen
test %1
bxx* {LABEL, $1}
/* Pop signed int, branch if... */
pat zeq call zxx("beq") /* top == 0 */
pat zne call zxx("bne") /* top != 0 */
pat zgt call zxx("bgt") /* top > 0 */
pat zge call zxx("bge") /* top >= 0 */
pat zlt call zxx("blt") /* top < 0 */
pat zle call zxx("ble") /* top >= 0 */
/* The peephole optimizer rewrites
* cmi 4 zeq
* as beq, and does same for bne, bgt, and so on.
*/
proc bxx example beq
with REG CONST2 STACK
gen
cmpwi %1, %2
bxx[2] {LABEL, $1}
with CONST2 REG STACK
gen
cmpwi %2, %1
bxx[1] {LABEL, $1}
with REG REG STACK
gen
cmpw %2, %1
bxx[1] {LABEL, $1}
/* Pop two signed ints, branch if... */
pat beq call bxx("beq", "beq") /* second == top */
pat bne call bxx("bne", "bne") /* second != top */
pat bgt call bxx("bgt", "blt") /* second > top */
pat bge call bxx("bge", "ble") /* second >= top */
pat blt call bxx("blt", "bgt") /* second < top */
pat ble call bxx("ble", "bge") /* second >= top */
proc cmu4zxx example cmu zeq
with REG CONST2 STACK
gen
cmplwi %1, %2
bxx[2] {LABEL, $2}
with CONST2 REG STACK
gen
cmplwi %2, %1
bxx[1] {LABEL, $2}
with REG REG STACK
gen
cmplw %2, %1
bxx[1] {LABEL, $2}
/* Pop two unsigned ints, branch if... */
pat cmu zeq $1==4 call cmu4zxx("beq", "beq")
pat cmu zne $1==4 call cmu4zxx("bne", "bne")
pat cmu zgt $1==4 call cmu4zxx("bgt", "blt")
pat cmu zge $1==4 call cmu4zxx("bge", "ble")
pat cmu zlt $1==4 call cmu4zxx("blt", "bgt")
pat cmu zle $1==4 call cmu4zxx("ble", "bge")
/* Comparisons */
/* Each comparison extracts the lt and gt bits from cr0.
* extlwi %a, %a, 2, 0
* puts lt in the sign bit, so lt yields a negative result,
* gt yields positive.
* rlwinm %a, %a, 1, 31, 0
* puts gt in the sign bit, to reverse the comparison.
*/
pat cmi $1==INT32 /* Signed tristate compare */
with REG CONST2
uses reusing %1, REG={COND_RC, %1, %2.val}
gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
yields %a
with CONST2 REG
uses reusing %2, REG={COND_RC, %2, %1.val}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
with REG REG
uses reusing %1, REG={COND_RR, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
pat cmu $1==INT32 /* Unsigned tristate compare */
with REG UCONST2
uses reusing %1, REG={CONDL_RC, %1, %2.val}
gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
yields %a
with UCONST2 REG
uses reusing %2, REG={CONDL_RC, %2, %1.val}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
with REG REG
uses reusing %1, REG={CONDL_RR, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
pat cmp /* Compare pointers */
leaving
cmu INT32
pat cms $1==INT32 /* Compare blocks (word sized) */
leaving
cmi INT32
pat cms defined($1)
leaving
loc $1
cal ".cms"
pat cms !defined($1)
leaving
cal ".cms"
/* Other branching and labelling */
pat lab topeltsize($1)==4 && !fallthrough($1)
kills ALL
gen
labeldef $1
yields r3
pat lab topeltsize($1)==4 && fallthrough($1)
with REG3 STACK
kills ALL
gen
labeldef $1
yields r3
pat lab topeltsize($1)!=4
with STACK
kills ALL
gen
labeldef $1
pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */
with REG3 STACK
gen
b {LABEL, $1}
pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */
with STACK
gen
b {LABEL, $1}
/* Miscellaneous */
pat cal /* Call procedure */
with STACK
kills ALL
gen
bl {LABEL, $1}
pat cai /* Call procedure indirect */
with REG STACK
kills ALL
gen
mtspr ctr, %1
bctrl.
pat lfr $1==INT32 /* Load function result, word */
yields r3
pat lfr $1==INT64 /* Load function result, double-word */
yields r4 r3
pat ret $1==0 /* Return from procedure */
gen
/* Restore saved registers. */
return
/* Epilog: restore lr and fp. */
lwz r0, {IND_RC_W, fp, 4}
mtspr lr, r0
lwz r0, {IND_RC_W, fp, 0}
/* Free our stack frame. */
addi sp, fp, {C, 8}
mr fp, r0
blr.
pat ret $1==4 /* Return from procedure, word */
with REG3
leaving ret 0
pat ret $1==8 /* Return from proc, double-word */
with REG3 REG
gen move %2, r4
leaving ret 0
/*
* These rules for blm/bls are wrong if length is zero.
* So are several procedures in libem.
*/
pat blm /* Block move constant length */
leaving
loc $1
bls
pat bls /* Block move variable length */
with REG REG REG
/* ( src%3 dst%2 len%1 -- ) */
uses reusing %1, REG, REG, REG
gen
srwi %a, %1, {C, 2}
mtspr ctr, %a
addi %b, %3, {C, 0-4}
addi %c, %2, {C, 0-4}
1: lwzu %a, {IND_RC_W, %b, 4}
stwu %a, {IND_RC_W, %c, 4}
bdnz {LABEL, "1b"}
pat csa /* Array-lookup switch */
with STACK
kills ALL
gen
b {LABEL, ".csa"}
pat csb /* Table-lookup switch */
with STACK
kills ALL
gen
b {LABEL, ".csb"}
/* EM specials */
pat fil /* Set current filename */
leaving
lae $1
ste "hol0+4"
pat lin /* Set current line number */
leaving
loc $1
ste "hol0"
pat lni /* Increment line number */
leaving
ine "hol0"
pat lim /* Load EM trap ignore mask */
leaving
lde ".ignmask"
pat sim /* Store EM trap ignore mask */
leaving
ste ".ignmask"
pat trp /* Raise EM trap */
with REG3
kills ALL
gen
bl {LABEL, ".trap"}
pat sig /* Set trap handler */
leaving
ste ".trppc"
pat rtt /* Return from trap */
leaving
ret 0
/*
* Lexical local base: lxl 0 yields our fp, lxl n yields the
* fp of the nth statically enclosing procedure.
*/
pat lxl $1==0
leaving
lor 0
pat lxl $1==1
yields {IND_RC_W, fp, SL_OFFSET}
pat lxl $1==2
uses REG={IND_RC_W, fp, SL_OFFSET}
yields {IND_RC_W, %a, SL_OFFSET}
pat lxl $1==3
uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG
gen move {IND_RC_W, %a, SL_OFFSET}, %b
yields {IND_RC_W, %b, SL_OFFSET}
pat lxl $1>=4 && $1<=0x8000
uses REG={IND_RC_W, fp, SL_OFFSET},
REG={CONST_0000_7FFF, $1-1}
gen
mtspr ctr, %b
1: lwz %a, {IND_RC_W, %a, SL_OFFSET}
bdnz {LABEL, "1b"}
yields %a
pat dch /* Dynamic chain: LB -> caller's LB */
with REG
yields {IND_RC_W, %1, FP_OFFSET}
pat lpb /* LB -> argument base */
leaving
adp EM_BSIZE
pat lxa /* Lexical argument base */
leaving
lxl $1
lpb
pat gto /* longjmp */
with STACK
uses REG
gen
move {LABEL, $1}, %a
move {IND_RC_W, %a, 8}, fp
move {IND_RC_W, %a, 4}, sp
move {IND_RC_W, %a, 0}, %a
mtspr ctr, %a
bctr.
pat lor $1==0 /* Load local base */
uses REG
gen
move fp, %a
yields %a
pat lor $1==1 /* Load stack pointer */
uses REG
gen
move sp, %a
yields %a
pat str $1==0 /* Store local base */
with REG
gen
move %1, fp
pat str $1==1 /* Store stack pointer */
with REG
gen
move %1, sp
pat lae rck $2==4 /* Range check */
with REG
kills ALL
gen
cmpwi %1, {C, rom($1, 1)}
blt {LABEL, ".trap_erange"}
cmpwi %1, {C, rom($1, 2)}
bgt {LABEL, ".trap_erange"}
yields %1
/* Single-precision floating-point */
pat zrf $1==INT32 /* Push zero */
leaving
loe ".fs_00000000"
pat adf $1==4 /* Add single */
with FSREG FSREG
uses reusing %1, FSREG
gen
fadds %a, %2, %1
yields %a
pat adf stl $1==4 && inreg($2)==reg_float
with FSREG FSREG
gen fadds {LOCAL, $2}, %2, %1
pat sbf $1==4 /* Subtract single */
with FSREG FSREG
uses reusing %1, FSREG
gen
fsubs %a, %2, %1
yields %a
pat sbf stl $1==4 && inreg($2)==reg_float
with FSREG FSREG
gen fsubs {LOCAL, $2}, %2, %1
pat mlf $1==4 /* Multiply single */
with FSREG FSREG
uses reusing %1, FSREG
gen
fmuls %a, %2, %1
yields %a
pat mlf stl $1==4 && inreg($2)==reg_float
with FSREG FSREG
gen fmuls {LOCAL, $2}, %2, %1
pat dvf $1==INT32 /* Divide single */
with FSREG FSREG
uses reusing %1, FSREG
gen
fdivs %a, %2, %1
yields %a
pat dvf stl $1==4 && inreg($2)==reg_float
with FSREG FSREG
gen fdivs {LOCAL, $2}, %2, %1
pat ngf $1==INT32 /* Negate single */
with FSREG
uses reusing %1, FSREG
gen
fneg %a, %1
yields %a
pat ngf stl $1==4 && inreg($2)==reg_float
with FSREG
gen fneg {LOCAL, $2}, %1
pat cmf $1==INT32 /* Compare single */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
pat cmf teq $1==4 /* Single second == top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XEQ, %a}
pat cmf tne $1==4 /* Single second == top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XNE, %a}
pat cmf tgt $1==4 /* Single second > top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XGT, %a}
pat cmf tge $1==4 /* Single second >= top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XGE, %a}
pat cmf tlt $1==4 /* Single second < top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XLT, %a}
pat cmf tle $1==4 /* Single second <= top */
with FSREG FSREG
uses REG={COND_FS, %2, %1}
yields {XLE, %a}
proc cmf4zxx example cmf zeq
with FSREG FSREG STACK
uses REG
gen
fcmpo cr0, %2, %1
bxx* {LABEL, $2}
/* Pop 2 singles, branch if... */
pat cmf zeq $1==4 call cmf4zxx("beq")
pat cmf zne $1==4 call cmf4zxx("bne")
pat cmf zgt $1==4 call cmf4zxx("bgt")
pat cmf zge $1==4 call cmf4zxx("bge")
pat cmf zlt $1==4 call cmf4zxx("blt")
pat cmf zle $1==4 call cmf4zxx("ble")
pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */
with FSREG
yields %1.1
/* Convert single to signed int */
pat loc loc cfi $1==4 && $2==4
leaving
loc 4
loc 8
cff
loc 8
loc 4
cfi
/* Convert single to unsigned int */
pat loc loc cfu $1==4 && $2==4
leaving
loc 4
loc 8
cff
loc 8
loc 4
cfu
/* Convert signed int to single */
pat loc loc cif $1==4 && $2==4
leaving
loc 4
loc 8
cif
loc 8
loc 4
cff
/* Convert unsigned int to single */
pat loc loc cuf $1==4 && $2==4
leaving
loc 4
loc 8
cuf
loc 8
loc 4
cff
/* Double-precision floating-point */
pat zrf $1==INT64 /* Push zero */
leaving
lde ".fd_00000000"
pat adf $1==8 /* Add double */
with FREG FREG
uses reusing %1, FREG
gen
fadd %a, %2, %1
yields %a
pat adf sdl $1==8 && inreg($2)==reg_float
with FREG FREG
gen fadd {DLOCAL, $2}, %2, %1
pat sbf $1==8 /* Subtract double */
with FREG FREG
uses reusing %1, FREG
gen
fsub %a, %2, %1
yields %a
pat sbf sdl $1==8 && inreg($2)==reg_float
with FREG FREG
gen fsub {DLOCAL, $2}, %2, %1
pat mlf $1==8 /* Multiply double */
with FREG FREG
uses reusing %1, FREG
gen
fmul %a, %2, %1
yields %a
pat mlf sdl $1==8 && inreg($2)==reg_float
with FREG FREG
gen fmul {DLOCAL, $2}, %2, %1
pat dvf $1==8 /* Divide double */
with FREG FREG
uses reusing %1, FREG
gen
fdiv %a, %2, %1
yields %a
pat dvf sdl $1==8 && inreg($2)==reg_float
with FREG FREG
gen fdiv {DLOCAL, $2}, %2, %1
pat ngf $1==8 /* Negate double */
with FREG
uses reusing %1, FREG
gen
fneg %a, %1
yields %a
pat ngf sdl $1==8 && inreg($2)==reg_float
with FREG
gen fneg {DLOCAL, $2}, %1
pat cmf $1==INT64 /* Compare double */
with FREG FREG
uses REG={COND_FD, %2, %1}
gen extlwi %a, %a, {C, 2}, {C, 0}
yields %a
pat cmf teq $1==8 /* Double second == top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XEQ, %a}
pat cmf tne $1==8 /* Single second == top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XNE, %a}
pat cmf tgt $1==8 /* Double second > top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XGT, %a}
pat cmf tge $1==8 /* Double second >= top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XGE, %a}
pat cmf tlt $1==8 /* Double second < top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XLT, %a}
pat cmf tle $1==8 /* Double second <= top */
with FREG FREG
uses REG={COND_FD, %2, %1}
yields {XLE, %a}
proc cmf8zxx example cmf zeq
with FREG FREG STACK
uses REG
gen
fcmpo cr0, %2, %1
bxx* {LABEL, $2}
/* Pop 2 doubles, branch if... */
pat cmf zeq $1==8 call cmf8zxx("beq")
pat cmf zne $1==8 call cmf8zxx("bne")
pat cmf zgt $1==8 call cmf8zxx("bgt")
pat cmf zge $1==8 call cmf8zxx("bge")
pat cmf zlt $1==8 call cmf8zxx("blt")
pat cmf zle $1==8 call cmf8zxx("ble")
/* Convert double to single */
/* reg_float pattern must be first, or it goes unused! */
pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float
with FREG
gen frsp {LOCAL, $4}, %1
pat loc loc cff $1==8 && $2==4
with FREG
uses reusing %1, FSREG
gen frsp %a, %1
yields %a
/* Convert double to signed int */
pat loc loc cfi $1==8 && $2==4
with FREG STACK
uses reusing %1, FREG
gen
fctiwz %a, %1
stfdu %a, {IND_RC_D, sp, 0-8}
addi sp, sp, {C, 4}
/* Convert double to unsigned int */
pat loc loc cfu $1==8 && $2==4
leaving
cal ".cfu8"
/* Convert signed int to double */
pat loc loc cif $1==4 && $2==8
leaving
cal ".cif8"
/* Convert unsigned int to double */
pat loc loc cuf $1==4 && $2==8
leaving
cal ".cuf8"
pat fef $1==8 /* Split fraction, exponent */
leaving
cal ".fef8"
/* Multiply two doubles, then split fraction, integer */
pat fif $1==8
leaving
cal ".fif8"