3dae9e49cc
In the instruction list, put /* kills xer */ for sraw, srawi, subfic; and correct the (now unused) "addi." and "lfdu". Change MACHOPT_F from -m3 to -m2. This changes the code for 15 * i from slwi r3,r4,4 subfic r5,r4,0 add r3,r3,r5 to mulli r3,r4,15 If the sequence "slwi subfic addi" takes 3 cycles and 12 bytes, and mulli takes 3 cycles and 4 bytes, then mulli is better.
2620 lines
68 KiB
Plaintext
2620 lines
68 KiB
Plaintext
/*
|
|
* PowerPC table for ncg
|
|
*
|
|
* David Given created this table.
|
|
* George Koehler made many changes in years 2016 to 2018.
|
|
*
|
|
* This back end provides 4-byte integers, 4-byte floats, and 8-byte
|
|
* floats. It should provide enough of EM for the ACK's compilers.
|
|
* - It doesn't provide "mon" (monitor call) nor "lor 2", "str 2"
|
|
* (heap pointer). Programs should call procedures in libsys to
|
|
* make system calls or allocate heap memory.
|
|
* - It generates only a few EM traps:
|
|
* - EARRAY from aar, lar, sar
|
|
* - ERANGE from rck
|
|
* - ECASE from csa, csb
|
|
* - It uses floating-point registers to move 8-byte values that
|
|
* aren't floats. This might cause extra FPU context switches in
|
|
* programs that don't use floating point.
|
|
*
|
|
* The EM stack is less than optimal for PowerPC, and incompatible
|
|
* with the calling conventions of other compilers (like gcc).
|
|
* - EM and ncg use the stack to pass parameters to procedures. For
|
|
* PowerPC, this is probably slower than passing them in registers.
|
|
* - This back end misaligns some 8-byte floats, because EM's stack
|
|
* has only 4-byte alignment. (This kind of misalignment also
|
|
* happened in IBM's AIX and Apple's Mac OS, where data structures
|
|
* had 8-byte floats with only 4-byte alignment.)
|
|
*/
|
|
|
|
EM_WSIZE = 4
|
|
EM_PSIZE = 4
|
|
EM_BSIZE = 8 /* two words saved in call frame */
|
|
|
|
FP_OFFSET = 0 /* Offset of saved FP relative to our FP */
|
|
PC_OFFSET = 4 /* Offset of saved PC relative to our FP */
|
|
|
|
#define COMMENT(n) /* comment {LABEL, n} */
|
|
|
|
#define nicesize(x) ((x)==1 || (x)==2 || (x)==4 || (x)==8)
|
|
|
|
#define smalls(n) sfit(n, 16)
|
|
#define smallu(n) ufit(n, 16)
|
|
|
|
/* Finds FRAME_V tokens that overlap myoff, mysize. */
|
|
#define fover(myoff, mysize) (%off+%size>(myoff) && %off<((myoff)+(mysize)))
|
|
|
|
/* Checks if we can use {LXFRAME, x}. */
|
|
#define nicelx(x) ((x)>=1 && (x)<=0x8000)
|
|
|
|
#define lo(n) ((n) & 0xFFFF)
|
|
#define hi(n) (((n)>>16) & 0xFFFF)
|
|
|
|
/* Use these for instructions that treat the low half as signed --- his()
|
|
* includes a modifier to produce the correct value when the low half gets
|
|
* sign extended. Er, do make sure you load the low half second. */
|
|
#define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF)))
|
|
#define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF)
|
|
|
|
|
|
PROPERTIES
|
|
|
|
GPR /* general-purpose register */
|
|
SPFP /* sp or fp */
|
|
REG /* allocatable GPR */
|
|
REG3 /* coercion to r3 */
|
|
|
|
FPR(8) /* floating-point register */
|
|
FREG(8) /* allocatable FPR */
|
|
FSREG /* allocatable single-precision FPR */
|
|
|
|
SPR /* special-purpose register */
|
|
CR /* condition register */
|
|
|
|
|
|
REGISTERS
|
|
|
|
/*
|
|
* We use r1 as stack pointer and r2 as frame pointer.
|
|
* Our assembler has aliases sp -> r1 and fp -> r2.
|
|
*
|
|
* We preserve r13 to r31 and f14 to f31 across function
|
|
* calls to mimic other compilers (like gcc). See
|
|
* - http://refspecs.linuxbase.org/elf/elfspec_ppc.pdf
|
|
* - https://github.com/ryanarn/powerabi -> chap3-elf32abi.sgml
|
|
* - Apple's "32-bit PowerPC Function Calling Conventions"
|
|
*
|
|
* When ncg allocates regvars, it seems to start with the last
|
|
* register in the first class. To encourage ncg to allocate
|
|
* them from r31 down, we list them in one class as
|
|
* r13, r14, ..., r31: GPR, REG regvar(reg_any).
|
|
*/
|
|
|
|
r0, r12 : GPR.
|
|
sp, fp : GPR, SPFP.
|
|
r3 : GPR, REG, REG3.
|
|
r4, r5, r6, r7, r8, r9, r10, r11 : GPR, REG.
|
|
|
|
r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24,
|
|
r25, r26, r27, r28, r29, r30, r31
|
|
: GPR, REG regvar(reg_any).
|
|
|
|
f0 : FPR.
|
|
|
|
f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13
|
|
: FPR, FREG.
|
|
|
|
f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25,
|
|
f26, f27, f28, f29, f30, f31
|
|
: FPR, FREG regvar(reg_float).
|
|
|
|
fs1("f1")=f1, fs2("f2")=f2, fs3("f3")=f3, fs4("f4")=f4,
|
|
fs5("f5")=f5, fs6("f6")=f6, fs7("f7")=f7, fs8("f8")=f8,
|
|
fs9("f9")=f9, fs10("f10")=f10, fs11("f11")=f11, fs12("f12")=f12,
|
|
fs13("f13")=f13
|
|
: FSREG.
|
|
|
|
/* reglap: reg_float may have subregister of different size */
|
|
fs14("f14")=f14, fs15("f15")=f15, fs16("f16")=f16, fs17("f17")=f17,
|
|
fs18("f18")=f18, fs19("f19")=f19, fs20("f20")=f20, fs21("f21")=f21,
|
|
fs22("f22")=f22, fs23("f23")=f23, fs24("f24")=f24, fs25("f25")=f25,
|
|
fs26("f26")=f26, fs27("f27")=f27, fs28("f28")=f28, fs29("f29")=f29,
|
|
fs30("f30")=f30, fs31("f31")=f31
|
|
: FSREG regvar(reg_float).
|
|
|
|
lr, ctr : SPR.
|
|
cr0 : CR. /* We use cr0, ignore cr1 to cr7. */
|
|
|
|
/* The stacking rules can't allocate registers. We use these
|
|
* scratch registers to stack tokens.
|
|
*/
|
|
#define RSCRATCH r0
|
|
#define FSCRATCH f0
|
|
|
|
|
|
TOKENS
|
|
|
|
/* Primitives */
|
|
|
|
C /* constant */ = { INT val; } 4 val.
|
|
LABEL = { ADDR adr; } 4 adr.
|
|
LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]".
|
|
LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]".
|
|
LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]".
|
|
LOCAL = { INT off; } 4 ">>> BUG IN LOCAL".
|
|
DLOCAL = { INT off; } 8 ">>> BUG IN DLOCAL".
|
|
|
|
/* Allows us to use regvar() to refer to registers */
|
|
|
|
REG_EXPR = { REG reg; } 4 reg.
|
|
FREG_EXPR = { FREG reg; } 8 reg.
|
|
FSREG_EXPR = { FSREG reg; } 4 reg.
|
|
|
|
/* Constants on the stack */
|
|
|
|
CONST_N8000 = { INT val; } 4 val.
|
|
CONST_N7FFF_N0001 = { INT val; } 4 val.
|
|
CONST_0000_7FFF = { INT val; } 4 val.
|
|
CONST_8000 = { INT val; } 4 val.
|
|
CONST_8001_FFFF = { INT val; } 4 val.
|
|
CONST_HI_ZR = { INT val; } 4 val.
|
|
CONST_HI_LO = { INT val; } 4 val.
|
|
|
|
/* Expression partial results */
|
|
|
|
SEX_B = { GPR reg; } 4. /* sign extension */
|
|
SEX_H = { GPR reg; } 4.
|
|
|
|
SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */
|
|
SUM_RC = { GPR reg; INT off; } 4. /* reg + off */
|
|
SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */
|
|
SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */
|
|
|
|
SUB_CR = { INT val; GPR reg; } 4. /* val - reg */
|
|
SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */
|
|
NEG_R = { GPR reg; } 4. /* -reg */
|
|
MUL_RC = { GPR reg; INT val; } 4. /* reg * val */
|
|
MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */
|
|
DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */
|
|
DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */
|
|
|
|
/* Indirect loads and stores */
|
|
|
|
IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")".
|
|
IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
|
IND_RR_B = { GPR reg1; GPR reg2; } 4.
|
|
IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")".
|
|
IND_RL_H = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
|
IND_RR_H = { GPR reg1; GPR reg2; } 4.
|
|
IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")".
|
|
IND_RL_H_S = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
|
IND_RR_H_S = { GPR reg1; GPR reg2; } 4.
|
|
IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")".
|
|
IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
|
IND_RR_W = { GPR reg1; GPR reg2; } 4.
|
|
IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")".
|
|
IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")".
|
|
IND_RR_D = { GPR reg1; GPR reg2; } 8.
|
|
|
|
/* Local variables in frame */
|
|
|
|
FRAME_B = { INT level; GPR reg; INT off; INT size; }
|
|
4 off "(" reg ")".
|
|
FRAME_H = { INT level; GPR reg; INT off; INT size; }
|
|
4 off "(" reg ")".
|
|
FRAME_H_S = { INT level; GPR reg; INT off; INT size; }
|
|
4 off "(" reg ")".
|
|
FRAME_W = { INT level; GPR reg; INT off; INT size; }
|
|
4 off "(" reg ")".
|
|
FRAME_D = { INT level; GPR reg; INT off; INT size; }
|
|
8 off "(" reg ")".
|
|
|
|
LXFRAME = { INT level; } 4.
|
|
|
|
/* Bitwise logic */
|
|
|
|
NOT_R = { GPR reg; } 4. /* ~reg */
|
|
AND_RIS = { GPR reg; INT valhi; } 4.
|
|
AND_RC = { GPR reg; INT val; } 4.
|
|
AND_RR = { GPR reg1; GPR reg2; } 4.
|
|
ANDC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 & ~reg2 */
|
|
OR_RIS = { GPR reg; INT valhi; } 4.
|
|
OR_RC = { GPR reg; INT val; } 4.
|
|
OR_RR = { GPR reg1; GPR reg2; } 4.
|
|
ORC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 | ~reg2 */
|
|
XOR_RIS = { GPR reg; INT valhi; } 4.
|
|
XOR_RC = { GPR reg; INT val; } 4.
|
|
XOR_RR = { GPR reg1; GPR reg2; } 4.
|
|
NAND_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 & reg2) */
|
|
NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */
|
|
EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */
|
|
|
|
/* Comparisons */
|
|
|
|
COND_RC = { GPR reg; INT val; } 4.
|
|
COND_RR = { GPR reg1; GPR reg2; } 4.
|
|
CONDL_RC = { GPR reg; INT val; } 4.
|
|
CONDL_RR = { GPR reg1; GPR reg2; } 4.
|
|
COND_FS = { FSREG reg1; FSREG reg2; } 4.
|
|
COND_FD = { FREG reg1; FREG reg2; } 4.
|
|
|
|
XEQ = { GPR reg; } 4.
|
|
XNE = { GPR reg; } 4.
|
|
XGT = { GPR reg; } 4.
|
|
XGE = { GPR reg; } 4.
|
|
XLT = { GPR reg; } 4.
|
|
XLE = { GPR reg; } 4.
|
|
|
|
|
|
SETS
|
|
|
|
/* signed 16-bit integer */
|
|
CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF.
|
|
/* integer that, when negated, fits signed 16-bit */
|
|
CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000.
|
|
/* unsigned 16-bit integer */
|
|
UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF.
|
|
/* any constant on stack */
|
|
CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF +
|
|
CONST_8000 + CONST_8001_FFFF +
|
|
CONST_HI_ZR + CONST_HI_LO.
|
|
|
|
CONST = C + CONST_STACK.
|
|
|
|
SET_RC_B = IND_RC_B + IND_RL_B + FRAME_B.
|
|
SET_RC_H = IND_RC_H + IND_RL_H + FRAME_H.
|
|
SET_RC_H_S = IND_RC_H_S + IND_RL_H_S + FRAME_H_S.
|
|
SET_RC_W = IND_RC_W + IND_RL_W + FRAME_W.
|
|
SET_RC_D = IND_RC_D + IND_RL_D + FRAME_D.
|
|
|
|
IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B.
|
|
IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H +
|
|
IND_RC_H_S + IND_RL_H_S + IND_RR_H_S.
|
|
IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W.
|
|
IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D.
|
|
IND_V = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D.
|
|
|
|
FRAME_V = FRAME_B + FRAME_H + FRAME_H_S + FRAME_W + FRAME_D.
|
|
|
|
/* anything killed by sti (store indirect) */
|
|
MEMORY = IND_V + FRAME_V.
|
|
|
|
/* any integer from stack that we can easily move to GPR */
|
|
INT_W = SPFP + REG + CONST_STACK + SEX_B + SEX_H +
|
|
SUM_RIS + SUM_RC + SUM_RL + SUM_RR +
|
|
SUB_CR + SUB_RR + NEG_R +
|
|
MUL_RC + MUL_RR + DIV_RR + DIV_RR_U +
|
|
IND_ALL_B + IND_ALL_H + IND_ALL_W +
|
|
FRAME_B + FRAME_H + FRAME_H_S + FRAME_W +
|
|
NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR +
|
|
OR_RIS + OR_RC + OR_RR + ORC_RR +
|
|
XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR +
|
|
XEQ + XNE + XGT + XGE + XLT + XLE.
|
|
|
|
FLOAT_D = FREG + IND_ALL_D + FRAME_D.
|
|
FLOAT_W = FSREG + IND_ALL_W + FRAME_W.
|
|
|
|
|
|
INSTRUCTIONS
|
|
|
|
/* We give time as cycles of total latency from Freescale
|
|
* Semiconductor, MPC7450 RISC Microprocessor Family Reference
|
|
* Manual, Rev. 5, section 6.6.
|
|
*
|
|
* We have only 4-byte alignment for doubles; 8-byte alignment is
|
|
* optimal. We guess the misalignment penalty by adding 1 cycle to
|
|
* the cost of loading or storing a double:
|
|
* lfd lfdu lfdx: 4 -> 5
|
|
* stfd stfdu stfdx: 3 -> 4
|
|
*/
|
|
cost(4, 1) /* space, time */
|
|
|
|
add GPR:wo, GPR:ro, GPR:ro.
|
|
addX "add." GPR:wo:cc, GPR:ro, GPR:ro.
|
|
addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
|
|
li GPR:wo, CONST:ro.
|
|
addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro.
|
|
lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro.
|
|
and GPR:wo, GPR:ro, GPR:ro.
|
|
andc GPR:wo, GPR:ro, GPR:ro.
|
|
andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro.
|
|
andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro.
|
|
b LABEL:ro.
|
|
bc CONST:ro, CONST:ro, LABEL:ro.
|
|
bdnz LABEL:ro.
|
|
beq LABEL:ro.
|
|
bne LABEL:ro.
|
|
bgt LABEL:ro.
|
|
bge LABEL:ro.
|
|
blt LABEL:ro.
|
|
ble LABEL:ro.
|
|
bxx LABEL:ro. /* dummy */
|
|
bcctr CONST:ro, CONST:ro, CONST:ro.
|
|
bctr.
|
|
bcctrl CONST:ro, CONST:ro, CONST:ro.
|
|
bctrl.
|
|
bclr CONST:ro, CONST:ro, CONST:ro.
|
|
blr.
|
|
bl LABEL:ro.
|
|
cmp CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
|
|
cmpw GPR:ro, GPR:ro kills :cc.
|
|
cmpi CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
|
|
cmpwi GPR:ro, CONST:ro kills :cc.
|
|
cmpl CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc.
|
|
cmplw GPR:ro, GPR:ro kills :cc.
|
|
cmpli CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc.
|
|
cmplwi GPR:ro, CONST:ro kills :cc.
|
|
divw GPR:wo, GPR:ro, GPR:ro cost(4, 23).
|
|
divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23).
|
|
eqv GPR:wo, GPR:ro, GPR:ro.
|
|
extsb GPR:wo, GPR:ro.
|
|
extsh GPR:wo, GPR:ro.
|
|
fadd FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
|
fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
|
fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5).
|
|
fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
|
fctiwz FREG:wo, FREG:ro cost(4, 5).
|
|
fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
|
|
fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
|
|
fmr FPR:wo, FPR:ro cost(4, 5).
|
|
fmr FSREG:wo, FSREG:ro cost(4, 5).
|
|
fmul FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
|
fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
|
fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5).
|
|
fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5).
|
|
frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5).
|
|
fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
|
fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
|
lbz GPR:wo, SET_RC_B:ro cost(4, 3).
|
|
lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
|
lfd FPR+DLOCAL:wo, SET_RC_D:ro cost(4, 5).
|
|
lfdu FPR:wo, IND_RC_D:rw cost(4, 5).
|
|
lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5).
|
|
lfs FSREG+LOCAL:wo, SET_RC_W:ro cost(4, 4).
|
|
lfsu FSREG:wo, IND_RC_W:rw cost(4, 4).
|
|
lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4).
|
|
lha GPR:wo, SET_RC_H_S:ro cost(4, 3).
|
|
lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
|
lhz GPR:wo, SET_RC_H:ro cost(4, 3).
|
|
lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
|
lwz GPR+LOCAL:wo, SET_RC_W:ro cost(4, 3).
|
|
lwzu GPR:wo, IND_RC_W:rw cost(4, 3).
|
|
lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
|
mfcr GPR:wo cost(4,2).
|
|
mfspr GPR:wo, SPR:ro cost(4, 3).
|
|
mtspr SPR:wo, GPR:ro cost(4, 2).
|
|
mulli GPR:wo, GPR:ro, CONST:ro cost(4, 3).
|
|
mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4).
|
|
nand GPR:wo, GPR:ro, GPR:ro.
|
|
neg GPR:wo, GPR:ro.
|
|
nor GPR:wo, GPR:ro, GPR:ro.
|
|
or GPR:wo, GPR:ro, GPR:ro.
|
|
mr GPR:wo, GPR:ro.
|
|
orX "or." GPR:wo:cc, GPR:ro, GPR:ro.
|
|
mrX_readonly "mr." GPR:ro:cc, GPR:ro.
|
|
orc GPR:wo, GPR:ro, GPR:ro.
|
|
ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
|
|
oris GPR:wo, GPR:ro, CONST:ro.
|
|
rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro.
|
|
extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
|
|
extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
|
|
rotlwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
|
|
rotrwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
|
|
slwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
|
|
srwi GPR+LOCAL:wo, GPR:ro, CONST:ro.
|
|
rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro.
|
|
rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
|
slw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
|
sraw GPR+LOCAL:wo, GPR:ro, GPR:ro /* kills xer */ cost(4, 2).
|
|
srawi GPR+LOCAL:wo, GPR:ro, CONST:ro /* kills xer */ cost(4, 2).
|
|
srw GPR+LOCAL:wo, GPR:ro, GPR:ro.
|
|
stb GPR:ro, SET_RC_B:rw cost(4, 3).
|
|
stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
|
stfd FPR:ro, SET_RC_D:rw cost(4, 4).
|
|
stfdu FPR:ro, IND_RC_D:rw cost(4, 4).
|
|
stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4).
|
|
stfs FSREG:ro, SET_RC_W:rw cost(4, 3).
|
|
stfsu FSREG:ro, IND_RC_W:rw cost(4, 3).
|
|
stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3).
|
|
sth GPR:ro, SET_RC_H:rw cost(4, 3).
|
|
sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
|
stw GPR:ro, SET_RC_W:rw cost(4, 3).
|
|
stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
|
stwu GPR:ro, IND_RC_W:rw cost(4, 3).
|
|
subf GPR:wo, GPR:ro, GPR:ro.
|
|
subfic GPR:wo, GPR:ro, CONST:ro /* kills xer */.
|
|
xor GPR:wo, GPR:ro, GPR:ro.
|
|
xori GPR:wo, GPR:ro, CONST:ro.
|
|
xoris GPR:wo, GPR:ro, CONST:ro.
|
|
|
|
bug ">>> BUG" LABEL:ro cost(0, 0).
|
|
comment "!" LABEL:ro cost(0, 0).
|
|
|
|
|
|
MOVES
|
|
|
|
from GPR to GPR
|
|
gen mr %2, %1
|
|
|
|
from FSREG to FSREG
|
|
gen fmr %2, %1
|
|
|
|
from FPR to FPR
|
|
gen fmr %2, %1
|
|
|
|
/* Constants */
|
|
|
|
from CONST smalls(%val) to GPR
|
|
gen
|
|
COMMENT("move CONST->GPR smalls")
|
|
li %2, %1
|
|
|
|
from CONST lo(%val)==0 to GPR
|
|
gen
|
|
COMMENT("move CONST->GPR shifted")
|
|
lis %2, {C, hi(%1.val)}
|
|
|
|
from CONST to GPR
|
|
gen
|
|
COMMENT("move CONST->GPR")
|
|
lis %2, {C, hi(%1.val)}
|
|
ori %2, %2, {C, lo(%1.val)}
|
|
/* Can't use addi %2, %2, {C, los(%1.val)}
|
|
* because %2 might be R0. */
|
|
|
|
from LABEL to GPR
|
|
gen
|
|
COMMENT("move LABEL->GPR")
|
|
lis %2, {LABEL_HI, %1.adr}
|
|
ori %2, %2, {LABEL_LO, %1.adr}
|
|
|
|
from LABEL_HA to GPR
|
|
gen lis %2, %1
|
|
|
|
/* Sign extension */
|
|
|
|
from SEX_B to GPR
|
|
gen extsb %2, %1.reg
|
|
|
|
from SEX_H to GPR
|
|
gen extsh %2, %1.reg
|
|
|
|
/* Register + something */
|
|
|
|
from SUM_RIS to GPR
|
|
gen addis %2, %1.reg, {C, %1.offhi}
|
|
|
|
from SUM_RC to GPR
|
|
gen addi %2, %1.reg, {C, %1.off}
|
|
|
|
from SUM_RL to GPR
|
|
gen addi %2, %1.reg, {LABEL_LO, %1.adr}
|
|
|
|
from SUM_RR to GPR
|
|
gen add %2, %1.reg1, %1.reg2
|
|
|
|
/* Other arithmetic */
|
|
|
|
from SUB_CR to GPR
|
|
/* val - reg -> subtract reg from val */
|
|
gen subfic %2, %1.reg, {C, %1.val}
|
|
|
|
from SUB_RR to GPR
|
|
/* reg1 - reg2 -> subtract reg2 from reg1 */
|
|
gen subf %2, %1.reg2, %1.reg1
|
|
|
|
from NEG_R to GPR
|
|
gen neg %2, %1.reg
|
|
|
|
from MUL_RC to GPR
|
|
gen mulli %2, %1.reg, {C, %1.val}
|
|
|
|
from MUL_RR to GPR
|
|
gen mullw %2, %1.reg1, %1.reg2
|
|
|
|
from DIV_RR to GPR
|
|
gen divw %2, %1.reg1, %1.reg2
|
|
|
|
from DIV_RR_U to GPR
|
|
gen divwu %2, %1.reg1, %1.reg2
|
|
|
|
/* Read byte */
|
|
|
|
from SET_RC_B to GPR
|
|
gen lbz %2, %1
|
|
|
|
from IND_RR_B to GPR
|
|
gen lbzx %2, %1.reg1, %1.reg2
|
|
|
|
/* Write byte */
|
|
|
|
from GPR to SET_RC_B
|
|
gen stb %1, %2
|
|
|
|
from GPR to IND_RR_B
|
|
gen stbx %1, %2.reg1, %2.reg2
|
|
|
|
/* Read halfword (short) */
|
|
|
|
from SET_RC_H to GPR
|
|
gen lhz %2, %1
|
|
|
|
from IND_RR_H to GPR
|
|
gen lhzx %2, %1.reg1, %1.reg2
|
|
|
|
from SET_RC_H_S to GPR
|
|
gen lha %2, %1
|
|
|
|
from IND_RR_H_S to GPR
|
|
gen lhax %2, %1.reg1, %1.reg2
|
|
|
|
/* Write halfword */
|
|
|
|
from GPR to SET_RC_H
|
|
gen sth %1, %2
|
|
|
|
from GPR to IND_RR_H
|
|
gen sthx %1, %2.reg1, %2.reg2
|
|
|
|
/* Read word */
|
|
|
|
from SET_RC_W to GPR
|
|
gen lwz %2, %1
|
|
|
|
from IND_RR_W to GPR
|
|
gen lwzx %2, %1.reg1, %1.reg2
|
|
|
|
from SET_RC_W to FSREG
|
|
gen lfs %2, %1
|
|
|
|
from IND_RR_W to FSREG
|
|
gen lfsx %2, %1.reg1, %1.reg2
|
|
|
|
/* Write word */
|
|
|
|
from GPR to SET_RC_W
|
|
gen stw %1, %2
|
|
|
|
from GPR to IND_RR_W
|
|
gen stwx %1, %2.reg1, %2.reg2
|
|
|
|
from FSREG to SET_RC_W
|
|
gen stfs %1, %2
|
|
|
|
from FSREG to IND_RR_W
|
|
gen stfsx %1, %2.reg1, %2.reg2
|
|
|
|
/* Read double */
|
|
|
|
from SET_RC_D to FPR
|
|
gen lfd %2, %1
|
|
|
|
from IND_RR_D to FPR
|
|
gen lfdx %2, %1.reg1, %1.reg2
|
|
|
|
/* Write double */
|
|
|
|
from FPR to SET_RC_D
|
|
gen stfd %1, %2
|
|
|
|
from FPR to IND_RR_D
|
|
gen stfdx %1, %2.reg1, %2.reg2
|
|
|
|
/* LXFRAME is a lexical frame from the static chain. We define a move
|
|
so "uses REG={LXFRAME, $1}" may find a register with the same
|
|
frame, and not repeat the move. This move can't search for a REG
|
|
with {LXFRAME, $1-1}, but must always start from fp. The static
|
|
chain, if it exists, is the argument at fp + EM_BSIZE. */
|
|
|
|
from LXFRAME %level==1 to REG
|
|
gen lwz %2, {IND_RC_W, fp, EM_BSIZE}
|
|
from LXFRAME %level==2 to REG
|
|
gen lwz %2, {IND_RC_W, fp, EM_BSIZE}
|
|
/* PowerPC can't add r0 + EM_BSIZE,
|
|
* so %2 must not be r0. */
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
from LXFRAME %level==3 to REG
|
|
gen lwz %2, {IND_RC_W, fp, EM_BSIZE}
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
from LXFRAME %level==4 to REG
|
|
gen lwz %2, {IND_RC_W, fp, EM_BSIZE}
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
from LXFRAME to REG /* assuming %level in 2 to 0x8000 */
|
|
gen li %2, {C, %1.level-1}
|
|
mtspr ctr, %2
|
|
lwz %2, {IND_RC_W, fp, EM_BSIZE}
|
|
1: lwz %2, {IND_RC_W, %2, EM_BSIZE}
|
|
bdnz {LABEL, "1b"}
|
|
|
|
/* Logicals */
|
|
|
|
from NOT_R to GPR
|
|
gen nor %2, %1.reg, %1.reg
|
|
|
|
from AND_RIS to GPR
|
|
gen andisX %2, %1.reg, {C, %1.valhi}
|
|
|
|
from AND_RC to GPR
|
|
gen andiX %2, %1.reg, {C, %1.val}
|
|
|
|
from AND_RR to GPR
|
|
gen and %2, %1.reg1, %1.reg2
|
|
|
|
from ANDC_RR to GPR
|
|
gen andc %2, %1.reg1, %1.reg2
|
|
|
|
from OR_RIS to GPR
|
|
gen oris %2, %1.reg, {C, %1.valhi}
|
|
|
|
from OR_RC to GPR
|
|
gen ori %2, %1.reg, {C, %1.val}
|
|
|
|
from OR_RR to GPR
|
|
gen or %2, %1.reg1, %1.reg2
|
|
|
|
from ORC_RR to GPR
|
|
gen orc %2, %1.reg1, %1.reg2
|
|
|
|
from XOR_RIS to GPR
|
|
gen xoris %2, %1.reg, {C, %1.valhi}
|
|
|
|
from XOR_RC to GPR
|
|
gen xori %2, %1.reg, {C, %1.val}
|
|
|
|
from XOR_RR to GPR
|
|
gen xor %2, %1.reg1, %1.reg2
|
|
|
|
from NAND_RR to GPR
|
|
gen nand %2, %1.reg1, %1.reg2
|
|
|
|
from NOR_RR to GPR
|
|
gen nor %2, %1.reg1, %1.reg2
|
|
|
|
from EQV_RR to GPR
|
|
gen eqv %2, %1.reg1, %1.reg2
|
|
|
|
/* Conditions */
|
|
|
|
/* Compare values, then copy cr0 to GPR. */
|
|
|
|
from COND_RC to GPR
|
|
gen
|
|
cmpwi %1.reg, {C, %1.val}
|
|
mfcr %2
|
|
|
|
from COND_RR to GPR
|
|
gen
|
|
cmpw %1.reg1, %1.reg2
|
|
mfcr %2
|
|
|
|
from CONDL_RC to GPR
|
|
gen
|
|
cmplwi %1.reg, {C, %1.val}
|
|
mfcr %2
|
|
|
|
from CONDL_RR to GPR
|
|
gen
|
|
cmplw %1.reg1, %1.reg2
|
|
mfcr %2
|
|
|
|
from COND_FS to GPR
|
|
gen
|
|
fcmpo cr0, %1.reg1, %1.reg2
|
|
mfcr %2
|
|
|
|
from COND_FD to GPR
|
|
gen
|
|
fcmpo cr0, %1.reg1, %1.reg2
|
|
mfcr %2
|
|
|
|
/* Given a copy of cr0 in %1.reg, extract a condition bit
|
|
* (lt, gt, eq) and perhaps flip it.
|
|
*/
|
|
|
|
from XEQ to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 2}
|
|
|
|
from XNE to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 2}
|
|
xori %2, %2, {C, 1}
|
|
|
|
from XGT to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 1}
|
|
|
|
from XGE to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 0}
|
|
xori %2, %2, {C, 1}
|
|
|
|
from XLT to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 0}
|
|
|
|
from XLE to GPR
|
|
gen
|
|
extrwi %2, %1.reg, {C, 1}, {C, 1}
|
|
xori %2, %2, {C, 1}
|
|
|
|
/* REG_EXPR exists solely to allow us to use regvar() (which can only
|
|
be used in an expression) as a register constant. We can then use
|
|
our moves to GPR or REG to set register variables. This is easier
|
|
than defining moves to LOCAL, and avoids confusion between GPR and
|
|
FSREG in LOCAL. */
|
|
|
|
from INT_W + LXFRAME to REG_EXPR
|
|
gen move %1, %2.reg
|
|
|
|
from FLOAT_D to FREG_EXPR
|
|
gen move %1, %2.reg
|
|
|
|
from FLOAT_W to FSREG_EXPR
|
|
gen move %1, %2.reg
|
|
|
|
|
|
TESTS
|
|
|
|
/* Given "mrX %1, %1", ncgg would say, "Instruction destroys
|
|
* %1, not allowed here". We use mrX_readonly to trick ncgg.
|
|
*/
|
|
to test GPR
|
|
gen
|
|
mrX_readonly %1, %1
|
|
|
|
|
|
STACKINGRULES
|
|
|
|
from SPFP+REG to STACK
|
|
gen
|
|
COMMENT("stack SPFP+REG")
|
|
stwu %1, {IND_RC_W, sp, 0-4}
|
|
|
|
from INT_W-SPFP-REG to STACK
|
|
gen
|
|
COMMENT("stack INT_W-SPFP-REG")
|
|
move %1, RSCRATCH
|
|
stwu RSCRATCH, {IND_RC_W, sp, 0-4}
|
|
|
|
from FLOAT_D-FREG to STACK
|
|
gen
|
|
COMMENT("stack FLOAT_D-FREG")
|
|
move %1, FSCRATCH
|
|
stfdu FSCRATCH, {IND_RC_D, sp, 0-8}
|
|
|
|
from FREG to STACK
|
|
gen
|
|
COMMENT("stack FREG")
|
|
stfdu %1, {IND_RC_D, sp, 0-8}
|
|
|
|
from FSREG to STACK
|
|
gen
|
|
COMMENT("stack FSREG")
|
|
stfsu %1, {IND_RC_W, sp, 0-4}
|
|
|
|
/*
|
|
* We never stack LOCAL or DLOCAL tokens, because we only use
|
|
* them for register variables, so ncg pushes the register,
|
|
* not the token. These rules only prevent an error in ncgg.
|
|
*/
|
|
from LOCAL to STACK
|
|
gen bug {LABEL, "STACKING LOCAL"}
|
|
from DLOCAL to STACK
|
|
gen bug {LABEL, "STACKING DLOCAL"}
|
|
|
|
|
|
COERCIONS
|
|
|
|
/* The unstacking coercions emit many "addi sp, sp, X"
|
|
* instructions; the target optimizer (top) will merge them.
|
|
*/
|
|
|
|
from STACK
|
|
uses REG
|
|
gen
|
|
COMMENT("coerce STACK->REG")
|
|
lwz %a, {IND_RC_W, sp, 0}
|
|
addi sp, sp, {C, 4}
|
|
yields %a
|
|
|
|
from STACK
|
|
uses FREG
|
|
gen
|
|
COMMENT("coerce STACK->FREG")
|
|
lfd %a, {IND_RC_D, sp, 0}
|
|
addi sp, sp, {C, 8}
|
|
yields %a
|
|
|
|
from STACK
|
|
uses FSREG
|
|
gen
|
|
COMMENT("coerce STACK->FSREG")
|
|
lfs %a, {IND_RC_W, sp, 0}
|
|
addi sp, sp, {C, 4}
|
|
yields %a
|
|
|
|
/* "uses REG=%1" may find and reuse a register containing the
|
|
* same token. For contrast, "uses REG gen move %1, %a" would
|
|
* pick a different register before doing the move.
|
|
*
|
|
* "reusing %1" helps when coercing an INT_W token like
|
|
* {SUM_RC, r3, 0-4} to REG3, by not stacking the token.
|
|
*/
|
|
|
|
from INT_W
|
|
uses reusing %1, REG=%1
|
|
yields %a
|
|
|
|
from FLOAT_D
|
|
uses reusing %1, FREG=%1
|
|
yields %a
|
|
|
|
from FLOAT_W
|
|
uses reusing %1, FSREG=%1
|
|
yields %a
|
|
|
|
/* Splitting coercions can't allocate registers.
|
|
* PowerPC can't add r0 + constant. Use r12.
|
|
*/
|
|
|
|
from IND_RC_D %off<=0x7FFA
|
|
yields
|
|
{IND_RC_W, %1.reg, %1.off+4}
|
|
{IND_RC_W, %1.reg, %1.off}
|
|
|
|
from IND_RC_D
|
|
/* Don't move to %1.reg; it might be a regvar. */
|
|
gen move {SUM_RC, %1.reg, %1.off}, r12
|
|
yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
|
|
|
|
from IND_RR_D
|
|
gen move {SUM_RR, %1.reg1, %1.reg2}, r12
|
|
yields {IND_RC_W, r12, 4} {IND_RC_W, r12, 0}
|
|
|
|
from FRAME_D %off<=0x7FFA
|
|
yields
|
|
{FRAME_W, %1.level, %1.reg, %1.off+4, 4}
|
|
{FRAME_W, %1.level, %1.reg, %1.off, 4}
|
|
|
|
|
|
PATTERNS
|
|
|
|
/* Constants */
|
|
|
|
pat loc $1==(0-0x8000) /* Load constant */
|
|
yields {CONST_N8000, $1}
|
|
pat loc $1>=(0-0x7FFF) && $1<=(0-1)
|
|
yields {CONST_N7FFF_N0001, $1}
|
|
pat loc $1>=0 && $1<=0x7FFF
|
|
yields {CONST_0000_7FFF, $1}
|
|
pat loc $1==0x8000
|
|
yields {CONST_8000, $1}
|
|
pat loc $1>=0x8001 && $1<=0xFFFF
|
|
yields {CONST_8001_FFFF, $1}
|
|
pat loc lo($1)==0
|
|
yields {CONST_HI_ZR, $1}
|
|
pat loc
|
|
yields {CONST_HI_LO, $1}
|
|
|
|
|
|
/* Stack shuffles */
|
|
|
|
/* The peephole optimizer does: loc $1 ass 4 -> asp $1
|
|
* To optimize multiplication, it uses: dup 8 asp 4
|
|
*/
|
|
|
|
pat asp $1==4 /* Adjust stack by constant */
|
|
with exact INT_W+FLOAT_W
|
|
/* drop %1 */
|
|
with STACK
|
|
gen addi sp, sp, {C, 4}
|
|
pat asp smalls($1)
|
|
with STACK
|
|
gen addi sp, sp, {C, $1}
|
|
pat asp lo($1)==0
|
|
with STACK
|
|
gen addi sp, sp, {C, hi($1)}
|
|
pat asp
|
|
with STACK
|
|
gen
|
|
addis sp, sp, {C, his($1)}
|
|
addi sp, sp, {C, los($1)}
|
|
|
|
pat ass $1==4 /* Adjust stack by variable */
|
|
with REG STACK
|
|
gen add sp, sp, %1
|
|
|
|
/* To duplicate a token, we coerce the token into a register,
|
|
* then duplicate the register. This decreases code size.
|
|
*/
|
|
|
|
pat dup $1==4 /* Duplicate word on top of stack */
|
|
with REG+FSREG
|
|
yields %1 %1
|
|
|
|
pat dup $1==8 /* Duplicate double-word */
|
|
with REG+FSREG REG+FSREG
|
|
yields %2 %1 %2 %1
|
|
with FREG
|
|
yields %1 %1
|
|
|
|
pat dup /* Duplicate other size */
|
|
leaving
|
|
loc $1
|
|
dus 4
|
|
|
|
pat dus $1==4 /* Duplicate variable size */
|
|
with REG STACK
|
|
/* ( a size%1 -- a a ) */
|
|
uses REG, REG
|
|
gen
|
|
srwi %a, %1, {C, 2}
|
|
mtspr ctr, %a
|
|
add %b, sp, %1
|
|
1: lwzu %a, {IND_RC_W, %b, 0-4}
|
|
stwu %a, {IND_RC_W, sp, 0-4}
|
|
bdnz {LABEL, "1b"}
|
|
|
|
pat exg $1==4 /* Exchange top two words */
|
|
with INT_W+FLOAT_W INT_W+FLOAT_W
|
|
yields %1 %2
|
|
|
|
pat exg defined($1) /* Exchange other size */
|
|
leaving
|
|
loc $1
|
|
cal ".exg"
|
|
|
|
pat exg !defined($1)
|
|
leaving
|
|
cal ".exg"
|
|
|
|
|
|
/* Type conversions */
|
|
|
|
pat loc loc ciu /* signed -> unsigned */
|
|
leaving
|
|
loc $1
|
|
loc $2
|
|
cuu
|
|
|
|
pat loc loc cui /* unsigned -> signed */
|
|
leaving
|
|
loc $1
|
|
loc $2
|
|
cuu
|
|
|
|
pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */
|
|
/* nop */
|
|
|
|
pat loc loc cii $1<=4 && $2<=$1
|
|
/* signed -> signed of smaller or same size,
|
|
* no sign extension */
|
|
|
|
pat loc loc cii $1==1 && $2<=4 /* sign-extend char */
|
|
with REG
|
|
yields {SEX_B, %1}
|
|
|
|
pat loc loc cii $1==2 && $2<=4 /* sign-extend short */
|
|
with REG
|
|
yields {SEX_H, %1}
|
|
|
|
|
|
/* Local variables */
|
|
|
|
pat lal smalls($1) /* Load address of local */
|
|
yields {SUM_RC, fp, $1}
|
|
|
|
pat lal /* Load address of local */
|
|
uses REG={SUM_RIS, fp, his($1)}
|
|
yields {SUM_RC, %a, los($1)}
|
|
|
|
pat lal loi smalls($1) && $2==1 /* Load byte from local */
|
|
yields {FRAME_B, 0, fp, $1, 1}
|
|
|
|
/* Load half-word from local and sign-extend */
|
|
pat lal loi loc loc cii smalls($1) && $2==2 && $3==2 && $4==4
|
|
yields {FRAME_H_S, 0, fp, $1, 1}
|
|
|
|
pat lal loi smalls($1) && $2==2 /* Load half-word from local */
|
|
yields {FRAME_H, 0, fp, $1, 1}
|
|
|
|
/* Load word from local */
|
|
pat lol inreg($1)==reg_any || inreg($1)==reg_float
|
|
yields {LOCAL, $1}
|
|
pat lol smalls($1)
|
|
yields {FRAME_W, 0, fp, $1, 4}
|
|
pat lol
|
|
leaving
|
|
lal $1
|
|
loi 4
|
|
|
|
pat ldl inreg($1)==reg_float /* Load double-word from local */
|
|
yields {DLOCAL, $1}
|
|
pat ldl smalls($1) && smalls($1+4)
|
|
/* smalls($1+4) implies FRAME_D %off<=0xFFFA */
|
|
yields {FRAME_D, 0, fp, $1, 8}
|
|
pat ldl
|
|
leaving
|
|
lal $1
|
|
loi 8
|
|
|
|
pat lal sti smalls($1) && $2==1 /* Store byte to local */
|
|
with REG
|
|
kills IND_V, FRAME_V %level==0 && fover($1, 1)
|
|
gen move %1, {FRAME_B, 0, fp, $1, 1}
|
|
|
|
pat lal sti smalls($1) && $2==2 /* Store half-word to local */
|
|
with REG
|
|
kills IND_V, FRAME_V %level==0 && fover($1, 2)
|
|
gen move %1, {FRAME_H, 0, fp, $1, 2}
|
|
|
|
pat stl inreg($1)==reg_any /* Store word to local */
|
|
with exact INT_W
|
|
/* ncg fails to infer that regvar($1) is dead! */
|
|
kills regvar($1)
|
|
gen move %1, {REG_EXPR, regvar($1)}
|
|
with STACK
|
|
gen
|
|
lwz {LOCAL, $1}, {IND_RC_W, sp, 0}
|
|
addi sp, sp, {C, 4}
|
|
pat stl inreg($1)==reg_float
|
|
with exact FLOAT_W
|
|
kills regvar_w($1, reg_float)
|
|
gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)}
|
|
with STACK
|
|
gen
|
|
lfs {LOCAL, $1}, {IND_RC_W, sp, 0}
|
|
addi sp, sp, {C, 4}
|
|
pat stl smalls($1)
|
|
with REG+FSREG
|
|
kills IND_V, FRAME_V %level==0 && fover($1, 4)
|
|
gen move %1, {FRAME_W, 0, fp, $1, 4}
|
|
pat stl
|
|
leaving
|
|
lal $1
|
|
sti 4
|
|
|
|
pat sdl inreg($1)==reg_float /* Store double-word to local */
|
|
with exact FLOAT_D
|
|
kills regvar_d($1, reg_float)
|
|
gen move %1, {FREG_EXPR, regvar_d($1, reg_float)}
|
|
with STACK
|
|
gen
|
|
lfd {DLOCAL, $1}, {IND_RC_D, sp, 0}
|
|
addi sp, sp, {C, 8}
|
|
pat sdl smalls($1) && smalls($1+4)
|
|
with REG REG
|
|
kills IND_V, FRAME_V %level==0 && fover($1, 8)
|
|
gen
|
|
move %1, {FRAME_W, 0, fp, $1, 4}
|
|
move %2, {FRAME_W, 0, fp, $1+4, 4}
|
|
with FREG
|
|
kills IND_V, FRAME_V %level==0 && fover($1, 4)
|
|
gen move %1, {FRAME_D, 0, fp, $1, 8}
|
|
pat sdl
|
|
leaving
|
|
lal $1
|
|
sti 8
|
|
|
|
pat lil /* Load indirect from local */
|
|
leaving
|
|
lol $1
|
|
loi 4
|
|
|
|
pat sil /* Store indirect to local */
|
|
leaving
|
|
lol $1
|
|
sti 4
|
|
|
|
pat zrl /* Zero local */
|
|
leaving
|
|
loc 0
|
|
stl $1
|
|
|
|
pat inl /* Increment local */
|
|
leaving
|
|
lol $1
|
|
loc 1
|
|
adi 4
|
|
stl $1
|
|
|
|
pat del /* Decrement local */
|
|
leaving
|
|
lol $1
|
|
loc 1
|
|
sbi 4
|
|
stl $1
|
|
|
|
|
|
/* Local variables of procedures on static chain */
|
|
|
|
/* lxa (lexical argument base) -> lxl (lexical local base) */
|
|
pat lxa adp nicelx($1)
|
|
leaving lxl $1 adp $2+EM_BSIZE
|
|
pat lxa lof nicelx($1)
|
|
leaving lxl $1 lof $2+EM_BSIZE
|
|
pat lxa ldf nicelx($1)
|
|
leaving lxl $1 ldf $2+EM_BSIZE
|
|
pat lxa stf nicelx($1)
|
|
leaving lxl $1 stf $2+EM_BSIZE
|
|
pat lxa sdf nicelx($1)
|
|
leaving lxl $1 stf $2+EM_BSIZE
|
|
pat lxa nicelx($1)
|
|
leaving lxl $1 adp EM_BSIZE
|
|
|
|
/* Load locals in statically enclosing procedures */
|
|
pat lxl adp loi nicelx($1) && smalls($2) && $3==1
|
|
uses REG={LXFRAME, $1}
|
|
yields {FRAME_B, $1, %a, $2, 1}
|
|
pat lxl adp loi loc loc cii nicelx($1) && smalls($2) &&
|
|
$3==2 && $4==2 && $5==4
|
|
uses REG={LXFRAME, $1}
|
|
yields {FRAME_H_S, $1, %a, $2, 2}
|
|
pat lxl adp loi nicelx($1) && smalls($2) && $3==2
|
|
uses REG={LXFRAME, $1}
|
|
yields {FRAME_H, $1, %a, $2, 2}
|
|
pat lxl lof nicelx($1) && smalls($2)
|
|
uses REG={LXFRAME, $1}
|
|
yields {FRAME_W, $1, %a, $2, 4}
|
|
pat lxl ldf nicelx($1) && smalls($2) && smalls($2+4)
|
|
uses REG={LXFRAME, $1}
|
|
/* smalls($2+4) implies FRAME_D %off<=0xFFFA */
|
|
yields {FRAME_D, $1, %a, $2, 8}
|
|
|
|
/* Store locals in statically enclosing procedures */
|
|
pat lxl adp sti nicelx($1) && smalls($2) && $3==1
|
|
with REG
|
|
kills IND_V, FRAME_V %level==$1 && fover($2, 1)
|
|
uses REG={LXFRAME, $1}
|
|
gen move %1, {FRAME_B, $1, %a, $2, 1}
|
|
pat lxl adp sti nicelx($1) && smalls($2) && $3==2
|
|
with REG
|
|
kills IND_V, FRAME_V %level==$1 && fover($2, 2)
|
|
uses REG={LXFRAME, $1}
|
|
gen move %1, {FRAME_H, $1, %a, $2, 2}
|
|
pat lxl stf nicelx($1) && smalls($2)
|
|
with REG+FSREG
|
|
kills IND_V, FRAME_V %level==$1 && fover($2, 4)
|
|
uses REG={LXFRAME, $1}
|
|
gen move %1, {FRAME_W, $1, %a, $2, 4}
|
|
pat lxl sdf nicelx($1) && smalls($2) && smalls($2+4)
|
|
with REG REG
|
|
kills IND_V, FRAME_V %level==$1 && fover($2, 8)
|
|
uses REG={LXFRAME, $1}
|
|
gen
|
|
move %1, {FRAME_W, $1, %a, $2, 4}
|
|
move %2, {FRAME_W, $1, %a, $2+4, 4}
|
|
with FREG
|
|
kills IND_V, FRAME_V %level==$1 && fover($2, 8)
|
|
uses REG={LXFRAME, $1}
|
|
gen move %1, {FRAME_D, $1, %a, $2, 8}
|
|
|
|
pat lxl nicelx($1) /* Local base on static chain */
|
|
uses REG={LXFRAME, $1}
|
|
yields %a /* Can't yield LXFRAME. */
|
|
pat lxl stl nicelx($1) && inreg($2)==reg_any
|
|
kills regvar($2)
|
|
gen move {LXFRAME, $1}, {REG_EXPR, regvar($2)}
|
|
|
|
pat lxl $1==0 /* Our local base */
|
|
yields fp
|
|
|
|
pat lxa $1==0 /* Our argument base */
|
|
yields {SUM_RC, fp, EM_BSIZE}
|
|
|
|
|
|
/* Global variables */
|
|
|
|
pat lpi /* Load address of function */
|
|
leaving
|
|
lae $1
|
|
|
|
pat lae /* Load address of external */
|
|
uses REG={LABEL_HA, $1}
|
|
yields {SUM_RL, %a, $1}
|
|
|
|
pat loe /* Load word external */
|
|
leaving
|
|
lae $1
|
|
loi 4
|
|
|
|
pat ste /* Store word external */
|
|
leaving
|
|
lae $1
|
|
sti 4
|
|
|
|
pat lde /* Load double-word external */
|
|
leaving
|
|
lae $1
|
|
loi 8
|
|
|
|
pat sde /* Store double-word external */
|
|
leaving
|
|
lae $1
|
|
sti 8
|
|
|
|
pat zre /* Zero external */
|
|
leaving
|
|
loc 0
|
|
ste $1
|
|
|
|
pat ine /* Increment external */
|
|
leaving
|
|
loe $1
|
|
inc
|
|
ste $1
|
|
|
|
pat dee /* Decrement external */
|
|
leaving
|
|
loe $1
|
|
dec
|
|
ste $1
|
|
|
|
|
|
/* Structures */
|
|
|
|
pat lof /* Load word offsetted */
|
|
leaving
|
|
adp $1
|
|
loi 4
|
|
|
|
pat ldf /* Load double-word offsetted */
|
|
leaving
|
|
adp $1
|
|
loi 8
|
|
|
|
pat stf /* Store word offsetted */
|
|
leaving
|
|
adp $1
|
|
sti 4
|
|
|
|
pat sdf /* Store double-word offsetted */
|
|
leaving
|
|
adp $1
|
|
sti 8
|
|
|
|
|
|
/* Loads and stores */
|
|
|
|
pat loi $1==1 /* Load byte indirect */
|
|
with REG
|
|
yields {IND_RC_B, %1, 0}
|
|
with exact SUM_RC
|
|
yields {IND_RC_B, %1.reg, %1.off}
|
|
with exact SUM_RL
|
|
yields {IND_RL_B, %1.reg, %1.adr}
|
|
with exact SUM_RR
|
|
yields {IND_RR_B, %1.reg1, %1.reg2}
|
|
|
|
/* Load half-word indirect and sign-extend */
|
|
pat loi loc loc cii $1==2 && $2==2 && $3==4
|
|
with REG
|
|
yields {IND_RC_H_S, %1, 0}
|
|
with exact SUM_RC
|
|
yields {IND_RC_H_S, %1.reg, %1.off}
|
|
with exact SUM_RL
|
|
yields {IND_RL_H_S, %1.reg, %1.adr}
|
|
with exact SUM_RR
|
|
yields {IND_RR_H_S, %1.reg1, %1.reg2}
|
|
|
|
pat loi $1==2 /* Load half-word indirect */
|
|
with REG
|
|
yields {IND_RC_H, %1, 0}
|
|
with exact SUM_RC
|
|
yields {IND_RC_H, %1.reg, %1.off}
|
|
with exact SUM_RL
|
|
yields {IND_RL_H, %1.reg, %1.adr}
|
|
with exact SUM_RR
|
|
yields {IND_RR_H, %1.reg1, %1.reg2}
|
|
|
|
pat loi $1==4 /* Load word indirect */
|
|
with REG
|
|
yields {IND_RC_W, %1, 0}
|
|
with exact SUM_RC
|
|
yields {IND_RC_W, %1.reg, %1.off}
|
|
with exact SUM_RL
|
|
yields {IND_RL_W, %1.reg, %1.adr}
|
|
with exact SUM_RR
|
|
yields {IND_RR_W, %1.reg1, %1.reg2}
|
|
|
|
pat loi $1==8 /* Load double-word indirect */
|
|
with REG
|
|
yields {IND_RC_D, %1, 0}
|
|
with exact SUM_RC
|
|
yields {IND_RC_D, %1.reg, %1.off}
|
|
with exact SUM_RL
|
|
yields {IND_RL_D, %1.reg, %1.adr}
|
|
with exact SUM_RR
|
|
yields {IND_RR_D, %1.reg1, %1.reg2}
|
|
|
|
pat loi /* Load arbitrary size */
|
|
leaving
|
|
loc $1
|
|
los 4
|
|
|
|
pat los $1==4 /* Load arbitrary size */
|
|
with REG3 STACK
|
|
kills ALL
|
|
gen bl {LABEL, ".los4"}
|
|
|
|
pat sti $1==1 /* Store byte indirect */
|
|
with REG REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_B, %1, 0}
|
|
with SUM_RC REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_B, %1.reg, %1.off}
|
|
with SUM_RL REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RL_B, %1.reg, %1.adr}
|
|
with SUM_RR REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RR_B, %1.reg1, %1.reg2}
|
|
|
|
pat sti $1==2 /* Store half-word indirect */
|
|
with REG REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_H, %1, 0}
|
|
with SUM_RC REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_H, %1.reg, %1.off}
|
|
with SUM_RL REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RL_H, %1.reg, %1.adr}
|
|
with SUM_RR REG
|
|
kills MEMORY
|
|
gen move %2, {IND_RR_H, %1.reg1, %1.reg2}
|
|
|
|
pat sti $1==4 /* Store word indirect */
|
|
with REG REG+FSREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_W, %1, 0}
|
|
with SUM_RC REG+FSREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_W, %1.reg, %1.off}
|
|
with SUM_RL REG+FSREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RL_W, %1.reg, %1.adr}
|
|
with SUM_RR REG+FSREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RR_W, %1.reg1, %1.reg2}
|
|
|
|
pat sti $1==8 /* Store double-word indirect */
|
|
with REG FREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_D, %1, 0}
|
|
with SUM_RC FREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RC_D, %1.reg, %1.off}
|
|
with SUM_RL FREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RL_D, %1.reg, %1.adr}
|
|
with SUM_RR FREG
|
|
kills MEMORY
|
|
gen move %2, {IND_RR_D, %1.reg1, %1.reg2}
|
|
with REG REG REG
|
|
kills MEMORY
|
|
gen
|
|
move %2, {IND_RC_W, %1, 0}
|
|
move %3, {IND_RC_W, %1, 4}
|
|
|
|
pat sti /* Store arbitrary size */
|
|
leaving
|
|
loc $1
|
|
sts 4
|
|
|
|
pat sts $1==4 /* Store arbitrary size */
|
|
with REG3 STACK
|
|
kills ALL
|
|
gen bl {LABEL, ".sts4"}
|
|
|
|
|
|
/* Arithmetic wrappers */
|
|
|
|
pat ads $1==4 /* Add var to pointer */
|
|
leaving adi $1
|
|
|
|
pat sbs $1==4 /* Subtract var from pointer */
|
|
leaving sbi $1
|
|
|
|
pat adp /* Add constant to pointer */
|
|
leaving
|
|
loc $1
|
|
adi 4
|
|
|
|
pat adu /* Add unsigned */
|
|
leaving
|
|
adi $1
|
|
|
|
pat sbu /* Subtract unsigned */
|
|
leaving
|
|
sbi $1
|
|
|
|
pat inc /* Add 1 */
|
|
leaving
|
|
loc 1
|
|
adi 4
|
|
|
|
pat dec /* Subtract 1 */
|
|
leaving
|
|
loc 1
|
|
sbi 4
|
|
|
|
pat mlu /* Multiply unsigned */
|
|
leaving
|
|
mli $1
|
|
|
|
pat slu /* Shift left unsigned */
|
|
leaving
|
|
sli $1
|
|
|
|
|
|
/* Word arithmetic */
|
|
|
|
/* Like most back ends, this one doesn't trap EIOVFL, so it
|
|
* ignores overflow in signed integers.
|
|
*/
|
|
|
|
pat adi $1==4 /* Add word (second + top) */
|
|
with REG REG
|
|
yields {SUM_RR, %1, %2}
|
|
with CONST2 REG
|
|
yields {SUM_RC, %2, %1.val}
|
|
with REG CONST2
|
|
yields {SUM_RC, %1, %2.val}
|
|
with CONST_HI_ZR REG
|
|
yields {SUM_RIS, %2, his(%1.val)}
|
|
with REG CONST_HI_ZR
|
|
yields {SUM_RIS, %1, his(%2.val)}
|
|
with CONST_STACK-CONST2-CONST_HI_ZR REG
|
|
uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
|
|
yields {SUM_RC, %a, los(%1.val)}
|
|
with REG CONST_STACK-CONST2-CONST_HI_ZR
|
|
uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
|
|
yields {SUM_RC, %a, los(%2.val)}
|
|
|
|
pat sbi $1==4 /* Subtract word (second - top) */
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG
|
|
yields {SUB_RR, %2, %1}
|
|
with CONST2_WHEN_NEG REG
|
|
yields {SUM_RC, %2, 0-%1.val}
|
|
with REG CONST2
|
|
yields {SUB_CR, %2.val, %1}
|
|
with CONST_HI_ZR REG
|
|
yields {SUM_RIS, %2, his(0-%1.val)}
|
|
with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG
|
|
uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
|
|
yields {SUM_RC, %a, los(0-%1.val)}
|
|
|
|
pat ngi $1==4 /* Negate word */
|
|
with REG
|
|
yields {NEG_R, %1}
|
|
|
|
pat mli $1==4 /* Multiply word (second * top) */
|
|
with CONST2 REG
|
|
yields {MUL_RC, %2, %1.val}
|
|
with REG CONST2
|
|
yields {MUL_RC, %1, %2.val}
|
|
with REG REG
|
|
yields {MUL_RR, %2, %1}
|
|
|
|
pat dvi $1==4 /* Divide word (second / top) */
|
|
with REG REG
|
|
yields {DIV_RR, %2, %1}
|
|
|
|
pat dvu $1==4 /* Divide unsigned word (second / top) */
|
|
with REG REG
|
|
yields {DIV_RR_U, %2, %1}
|
|
|
|
/* To calculate a remainder: a % b = a - (a / b * b) */
|
|
|
|
pat rmi $1==4 /* Remainder word (second % top) */
|
|
with REG REG
|
|
uses REG={DIV_RR, %2, %1}, REG
|
|
gen move {MUL_RR, %a, %1}, %b
|
|
yields {SUB_RR, %2, %b}
|
|
|
|
pat rmu $1==4 /* Remainder unsigned word (second % top) */
|
|
with REG REG
|
|
uses REG={DIV_RR_U, %2, %1}, REG
|
|
gen move {MUL_RR, %a, %1}, %b
|
|
yields {SUB_RR, %2, %b}
|
|
|
|
|
|
/* Bitwise logic */
|
|
|
|
/* This back end doesn't know how to combine shifts and
|
|
* bitwise ops to emit rlwinm, rlwnm, or rlwimi instructions.
|
|
*/
|
|
|
|
pat and $1==4 /* AND word */
|
|
with REG NOT_R
|
|
yields {ANDC_RR, %1, %2.reg}
|
|
with NOT_R REG
|
|
yields {ANDC_RR, %2, %1.reg}
|
|
with REG REG
|
|
yields {AND_RR, %1, %2}
|
|
with REG UCONST2
|
|
yields {AND_RC, %1, %2.val}
|
|
with UCONST2 REG
|
|
yields {AND_RC, %2, %1.val}
|
|
with REG CONST_HI_ZR
|
|
yields {AND_RIS, %1, hi(%2.val)}
|
|
with CONST_HI_ZR REG
|
|
yields {AND_RIS, %2, hi(%1.val)}
|
|
|
|
pat and defined($1) /* AND set */
|
|
leaving
|
|
loc $1
|
|
cal ".and"
|
|
|
|
pat and !defined($1)
|
|
leaving
|
|
cal ".and"
|
|
|
|
pat ior $1==4 /* OR word */
|
|
with REG NOT_R
|
|
yields {ORC_RR, %1, %2.reg}
|
|
with NOT_R REG
|
|
yields {ORC_RR, %2, %1.reg}
|
|
with REG REG
|
|
yields {OR_RR, %1, %2}
|
|
with REG UCONST2
|
|
yields {OR_RC, %1, %2.val}
|
|
with UCONST2 REG
|
|
yields {OR_RC, %2, %1.val}
|
|
with REG CONST_HI_ZR
|
|
yields {OR_RIS, %1, hi(%2.val)}
|
|
with CONST_HI_ZR REG
|
|
yields {OR_RIS, %2, hi(%1.val)}
|
|
with REG CONST_STACK-UCONST2-CONST_HI_ZR
|
|
uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
|
|
yields {OR_RC, %1, lo(%2.val)}
|
|
with CONST_STACK-UCONST2-CONST_HI_ZR REG
|
|
uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
|
|
yields {OR_RC, %2, lo(%1.val)}
|
|
|
|
pat ior defined($1) /* OR set */
|
|
leaving
|
|
loc $1
|
|
cal ".ior"
|
|
|
|
/* OR set (variable), used in lang/m2/libm2/LtoUset.e */
|
|
pat ior !defined($1)
|
|
leaving
|
|
cal ".ior"
|
|
|
|
pat xor $1==4 /* XOR word */
|
|
with REG REG
|
|
yields {XOR_RR, %1, %2}
|
|
with REG UCONST2
|
|
yields {XOR_RC, %1, %2.val}
|
|
with UCONST2 REG
|
|
yields {XOR_RC, %2, %1.val}
|
|
with REG CONST_HI_ZR
|
|
yields {XOR_RIS, %1, hi(%2.val)}
|
|
with CONST_HI_ZR REG
|
|
yields {XOR_RIS, %2, hi(%1.val)}
|
|
with REG CONST_STACK-UCONST2-CONST_HI_ZR
|
|
uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
|
|
yields {XOR_RC, %1, lo(%2.val)}
|
|
with CONST_STACK-UCONST2-CONST_HI_ZR REG
|
|
uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
|
|
yields {XOR_RC, %2, lo(%1.val)}
|
|
|
|
pat xor defined($1) /* XOR set */
|
|
leaving
|
|
loc $1
|
|
cal ".xor"
|
|
|
|
pat xor !defined($1)
|
|
leaving
|
|
cal ".xor"
|
|
|
|
pat com $1==4 /* NOT word */
|
|
with exact AND_RR
|
|
yields {NAND_RR, %1.reg1, %1.reg2}
|
|
with exact OR_RR
|
|
yields {NOR_RR, %1.reg1, %1.reg2}
|
|
with exact XOR_RR
|
|
yields {EQV_RR, %1.reg1, %1.reg2}
|
|
with REG
|
|
yields {NOT_R, %1}
|
|
|
|
pat com defined($1) /* NOT set */
|
|
leaving
|
|
loc $1
|
|
cal ".com"
|
|
|
|
pat com !defined($1)
|
|
leaving
|
|
cal ".com"
|
|
|
|
pat zer $1==4 /* Push zero */
|
|
leaving
|
|
loc 0
|
|
|
|
pat zer defined($1) /* Create empty set */
|
|
leaving
|
|
loc $1
|
|
cal ".zer"
|
|
|
|
|
|
/* Shifts and rotations */
|
|
|
|
pat sli $1==4 /* Shift left (second << top) */
|
|
with CONST_STACK REG
|
|
uses reusing %2, REG
|
|
gen slwi %a, %2, {C, %1.val & 0x1F}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG
|
|
gen slw %a, %2, %1
|
|
yields %a
|
|
pat sli stl $1==4 && inreg($2)==reg_any
|
|
with CONST_STACK REG
|
|
gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
|
|
with REG REG
|
|
gen slw {LOCAL, $2}, %2, %1
|
|
|
|
pat sri $1==4 /* Shift right signed (second >> top) */
|
|
with CONST_STACK REG
|
|
uses reusing %2, REG
|
|
gen srawi %a, %2, {C, %1.val & 0x1F}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG
|
|
gen sraw %a, %2, %1
|
|
yields %a
|
|
pat sri stl $1==4 && inreg($2)==reg_any
|
|
with CONST_STACK REG
|
|
gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
|
|
with REG REG
|
|
gen sraw {LOCAL, $2}, %2, %1
|
|
|
|
pat sru $1==4 /* Shift right unsigned (second >> top) */
|
|
with CONST_STACK REG
|
|
uses reusing %2, REG
|
|
gen srwi %a, %2, {C, %1.val & 0x1F}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG
|
|
gen srw %a, %2, %1
|
|
yields %a
|
|
pat sru stl $1==4 && inreg($2)==reg_any
|
|
with CONST_STACK REG
|
|
gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
|
|
with REG REG
|
|
gen srw {LOCAL, $2}, %2, %1
|
|
|
|
pat rol $1==4 /* Rotate left word */
|
|
with CONST_STACK REG
|
|
uses reusing %2, REG
|
|
gen rotlwi %a, %2, {C, %1.val & 0x1F}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG
|
|
gen rotlw %a, %2, %1
|
|
yields %a
|
|
pat rol stl $1==4 && inreg($2)==reg_any
|
|
with CONST_STACK REG
|
|
gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
|
|
with REG REG
|
|
gen rotlw {LOCAL, $2}, %2, %1
|
|
|
|
/*
|
|
* ror 4 -> ngi 4, rol 4
|
|
* because to rotate right by n bits is to rotate left by
|
|
* (32 - n), which is to rotate left by -n. PowerPC rotlw
|
|
* handles -n as (-n & 0x1F).
|
|
*/
|
|
|
|
pat ror $1==4 /* Rotate right word */
|
|
with CONST_STACK REG
|
|
uses reusing %2, REG
|
|
gen rotrwi %a, %2, {C, %1.val & 0x1F}
|
|
yields %a
|
|
with /* anything */
|
|
leaving
|
|
ngi 4
|
|
rol 4
|
|
pat ror stl $1==4 && inreg($2)==reg_any
|
|
with CONST_STACK REG
|
|
gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F}
|
|
with /* anything */
|
|
leaving
|
|
ngi 4
|
|
rol 4
|
|
stl $2
|
|
|
|
|
|
/* Arrays */
|
|
|
|
pat aar $1==4 /* Address of array element */
|
|
leaving cal ".aar4"
|
|
|
|
pat lar $1==4 /* Load from array */
|
|
with STACK
|
|
kills ALL
|
|
gen
|
|
bl {LABEL, ".aar4"}
|
|
/* pass r3 = size from .aar4 to .los4 */
|
|
bl {LABEL, ".los4"}
|
|
|
|
pat lae lar $2==4 && nicesize(rom($1, 3))
|
|
leaving
|
|
lae $1
|
|
aar 4
|
|
loi rom($1, 3)
|
|
|
|
pat sar $1==4 /* Store to array */
|
|
with STACK
|
|
kills ALL
|
|
gen
|
|
bl {LABEL, ".aar4"}
|
|
/* pass r3 = size from .aar4 to .sts4 */
|
|
bl {LABEL, ".sts4"}
|
|
|
|
pat lae sar $2==4 && nicesize(rom($1, 3))
|
|
leaving
|
|
lae $1
|
|
aar 4
|
|
sti rom($1, 3)
|
|
|
|
|
|
/* Sets */
|
|
|
|
pat set defined($1) /* Create singleton set */
|
|
leaving
|
|
loc $1
|
|
cal ".set"
|
|
|
|
/* Create set (variable), used in lang/m2/libm2/LtoUset.e */
|
|
pat set !defined($1)
|
|
leaving
|
|
cal ".set"
|
|
|
|
pat inn defined($1) /* Test for set bit */
|
|
leaving
|
|
loc $1
|
|
cal ".inn"
|
|
|
|
pat inn !defined($1)
|
|
leaving
|
|
cal ".inn"
|
|
|
|
|
|
/* Boolean resolutions */
|
|
|
|
pat teq /* top = (top == 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XEQ, %a}
|
|
|
|
pat tne /* top = (top != 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XNE, %a}
|
|
|
|
pat tlt /* top = (top < 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XLT, %a}
|
|
|
|
pat tle /* top = (top <= 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XLE, %a}
|
|
|
|
pat tgt /* top = (top > 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XGT, %a}
|
|
|
|
pat tge /* top = (top >= 0) */
|
|
with REG
|
|
uses reusing %1, REG
|
|
gen
|
|
test %1
|
|
mfcr %a
|
|
yields {XGE, %a}
|
|
|
|
pat cmi teq $1==4 /* Signed second == top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XEQ, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XEQ, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XEQ, %a}
|
|
|
|
pat cmi tne $1==4 /* Signed second != top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XNE, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XNE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XNE, %a}
|
|
|
|
pat cmi tgt $1==4 /* Signed second > top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XLT, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XGT, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XGT, %a}
|
|
|
|
pat cmi tge $1==4 /* Signed second >= top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XLE, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XGE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XGE, %a}
|
|
|
|
pat cmi tlt $1==4 /* Signed second < top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XGT, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XLT, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XLT, %a}
|
|
|
|
pat cmi tle $1==4 /* Signed second <= top */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
yields {XGE, %a}
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
yields {XLE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
yields {XLE, %a}
|
|
|
|
pat cmu teq $1==4 /* Unsigned second == top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XEQ, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XEQ, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XEQ, %a}
|
|
|
|
pat cmu tne $1==4 /* Unsigned second != top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XNE, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XNE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XNE, %a}
|
|
|
|
pat cmu tgt $1==4 /* Unsigned second > top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XLT, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XGT, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XGT, %a}
|
|
|
|
pat cmu tge $1==4 /* Unsigned second >= top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XLE, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XGE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XGE, %a}
|
|
|
|
pat cmu tlt $1==4 /* Unsigned second < top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XGT, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XLT, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XLT, %a}
|
|
|
|
pat cmu tle $1==4 /* Unsigned second <= top */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
yields {XGE, %a}
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
yields {XLE, %a}
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
yields {XLE, %a}
|
|
|
|
|
|
/* Simple branches */
|
|
|
|
proc zxx example zeq
|
|
with REG STACK
|
|
gen
|
|
test %1
|
|
bxx* {LABEL, $1}
|
|
|
|
/* Pop signed int, branch if... */
|
|
pat zeq call zxx("beq") /* top == 0 */
|
|
pat zne call zxx("bne") /* top != 0 */
|
|
pat zgt call zxx("bgt") /* top > 0 */
|
|
pat zge call zxx("bge") /* top >= 0 */
|
|
pat zlt call zxx("blt") /* top < 0 */
|
|
pat zle call zxx("ble") /* top >= 0 */
|
|
|
|
/* The peephole optimizer rewrites
|
|
* cmi 4 zeq
|
|
* as beq, and does same for bne, bgt, and so on.
|
|
*/
|
|
|
|
proc bxx example beq
|
|
with REG CONST2 STACK
|
|
gen
|
|
cmpwi %1, %2
|
|
bxx[2] {LABEL, $1}
|
|
with CONST2 REG STACK
|
|
gen
|
|
cmpwi %2, %1
|
|
bxx[1] {LABEL, $1}
|
|
with REG REG STACK
|
|
gen
|
|
cmpw %2, %1
|
|
bxx[1] {LABEL, $1}
|
|
|
|
/* Pop two signed ints, branch if... */
|
|
pat beq call bxx("beq", "beq") /* second == top */
|
|
pat bne call bxx("bne", "bne") /* second != top */
|
|
pat bgt call bxx("bgt", "blt") /* second > top */
|
|
pat bge call bxx("bge", "ble") /* second >= top */
|
|
pat blt call bxx("blt", "bgt") /* second < top */
|
|
pat ble call bxx("ble", "bge") /* second >= top */
|
|
|
|
proc cmu4zxx example cmu zeq
|
|
with REG CONST2 STACK
|
|
gen
|
|
cmplwi %1, %2
|
|
bxx[2] {LABEL, $2}
|
|
with CONST2 REG STACK
|
|
gen
|
|
cmplwi %2, %1
|
|
bxx[1] {LABEL, $2}
|
|
with REG REG STACK
|
|
gen
|
|
cmplw %2, %1
|
|
bxx[1] {LABEL, $2}
|
|
|
|
/* Pop two unsigned ints, branch if... */
|
|
pat cmu zeq $1==4 call cmu4zxx("beq", "beq")
|
|
pat cmu zne $1==4 call cmu4zxx("bne", "bne")
|
|
pat cmu zgt $1==4 call cmu4zxx("bgt", "blt")
|
|
pat cmu zge $1==4 call cmu4zxx("bge", "ble")
|
|
pat cmu zlt $1==4 call cmu4zxx("blt", "bgt")
|
|
pat cmu zle $1==4 call cmu4zxx("ble", "bge")
|
|
|
|
|
|
/* Comparisons */
|
|
|
|
/* Each comparison extracts the lt and gt bits from cr0.
|
|
* extlwi %a, %a, 2, 0
|
|
* puts lt in the sign bit, so lt yields a negative result,
|
|
* gt yields positive.
|
|
* rlwinm %a, %a, 1, 31, 0
|
|
* puts gt in the sign bit, to reverse the comparison.
|
|
*/
|
|
|
|
pat cmi $1==4 /* Signed tristate compare */
|
|
with REG CONST2
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
|
|
yields %a
|
|
with CONST2 REG
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
gen extlwi %a, %a, {C, 2}, {C, 0}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={COND_RR, %2, %1}
|
|
gen extlwi %a, %a, {C, 2}, {C, 0}
|
|
yields %a
|
|
|
|
pat cmu $1==4 /* Unsigned tristate compare */
|
|
with REG UCONST2
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0}
|
|
yields %a
|
|
with UCONST2 REG
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
gen extlwi %a, %a, {C, 2}, {C, 0}
|
|
yields %a
|
|
with REG REG
|
|
uses reusing %1, reusing %2, REG={CONDL_RR, %2, %1}
|
|
gen extlwi %a, %a, {C, 2}, {C, 0}
|
|
yields %a
|
|
|
|
pat cmp /* Compare pointers */
|
|
leaving
|
|
cmu 4
|
|
|
|
pat cms $1==4 /* Compare blocks (word sized) */
|
|
leaving
|
|
cmi 4
|
|
|
|
pat cms defined($1)
|
|
leaving
|
|
loc $1
|
|
cal ".cms"
|
|
|
|
pat cms !defined($1)
|
|
leaving
|
|
cal ".cms"
|
|
|
|
|
|
/* Other branching and labelling */
|
|
|
|
/* During an unconditional jump, if the top element on the
|
|
* stack has 4 bytes, then we hold it in register r3.
|
|
*/
|
|
pat lab topeltsize($1)==4 && !fallthrough($1)
|
|
kills ALL
|
|
gen labeldef $1
|
|
yields r3
|
|
|
|
pat lab topeltsize($1)==4 && fallthrough($1)
|
|
with REG3 STACK
|
|
kills ALL
|
|
gen labeldef $1
|
|
yields r3
|
|
|
|
pat lab topeltsize($1)!=4 /* Label without r3 */
|
|
with STACK
|
|
kills ALL
|
|
gen labeldef $1
|
|
|
|
pat bra topeltsize($1)==4 /* Branch with r3 */
|
|
with REG3 STACK
|
|
gen b {LABEL, $1}
|
|
|
|
pat bra topeltsize($1)!=4 /* Branch without r3 */
|
|
with STACK
|
|
gen b {LABEL, $1}
|
|
|
|
|
|
/* Miscellaneous */
|
|
|
|
pat cal /* Call procedure */
|
|
with STACK
|
|
kills ALL
|
|
gen bl {LABEL, $1}
|
|
|
|
pat cai /* Call procedure indirect */
|
|
with REG STACK
|
|
kills ALL
|
|
gen
|
|
mtspr ctr, %1
|
|
bctrl.
|
|
|
|
pat lfr $1==4 /* Load function result, word */
|
|
yields r3
|
|
|
|
pat lfr $1==8 /* Load function result, double-word */
|
|
yields r4 r3
|
|
|
|
pat ret $1==0 /* Return from procedure */
|
|
gen
|
|
/* Restore saved registers. */
|
|
return
|
|
/* Epilog: restore lr and fp. */
|
|
lwz r0, {IND_RC_W, fp, 4}
|
|
mtspr lr, r0
|
|
lwz r0, {IND_RC_W, fp, 0}
|
|
/* Free our stack frame. */
|
|
addi sp, fp, {C, 8}
|
|
mr fp, r0
|
|
blr.
|
|
|
|
/* If "ret" coerces STACK to REG3, then top will delete the
|
|
* extra "addi sp, sp, 4".
|
|
*/
|
|
|
|
pat ret $1==4 /* Return from procedure, word */
|
|
with REG3
|
|
leaving ret 0
|
|
|
|
pat ret $1==8 /* Return from proc, double-word */
|
|
with REG3 INT_W
|
|
gen move %2, r4
|
|
leaving ret 0
|
|
with REG3 STACK
|
|
gen lwz r4, {IND_RC_W, sp, 0}
|
|
leaving ret 0
|
|
|
|
/*
|
|
* These rules for blm/bls are wrong if length is zero.
|
|
* So are several procedures in libem.
|
|
*/
|
|
|
|
pat blm /* Block move constant length */
|
|
leaving
|
|
loc $1
|
|
bls
|
|
|
|
pat bls /* Block move variable length */
|
|
with REG SPFP+REG SPFP+REG
|
|
/* allows sp as %2, %3 */
|
|
/* ( src%3 dst%2 len%1 -- ) */
|
|
uses reusing %1, REG, REG, REG
|
|
gen
|
|
srwi %a, %1, {C, 2}
|
|
mtspr ctr, %a
|
|
addi %b, %3, {C, 0-4}
|
|
addi %c, %2, {C, 0-4}
|
|
1: lwzu %a, {IND_RC_W, %b, 4}
|
|
stwu %a, {IND_RC_W, %c, 4}
|
|
bdnz {LABEL, "1b"}
|
|
|
|
pat csa /* Array-lookup switch */
|
|
with STACK
|
|
kills ALL
|
|
gen b {LABEL, ".csa"}
|
|
|
|
pat csb /* Table-lookup switch */
|
|
with STACK
|
|
kills ALL
|
|
gen b {LABEL, ".csb"}
|
|
|
|
|
|
/* EM specials */
|
|
|
|
pat fil /* Set current filename */
|
|
leaving
|
|
lae $1
|
|
ste "hol0+4"
|
|
|
|
pat lin /* Set current line number */
|
|
leaving
|
|
loc $1
|
|
ste "hol0"
|
|
|
|
pat lni /* Increment line number */
|
|
leaving ine "hol0"
|
|
|
|
pat lim /* Load EM trap ignore mask */
|
|
leaving loe ".ignmask"
|
|
|
|
pat sim /* Store EM trap ignore mask */
|
|
leaving ste ".ignmask"
|
|
|
|
pat sig /* Set trap handler, yield old */
|
|
leaving
|
|
loe ".trppc"
|
|
exg 4
|
|
ste ".trppc"
|
|
|
|
pat trp /* Raise EM trap */
|
|
with REG3
|
|
kills ALL
|
|
gen bl {LABEL, ".trp"}
|
|
|
|
pat rtt /* Return from trap */
|
|
leaving ret 0
|
|
|
|
pat rck $1==4 /* Range check */
|
|
leaving cal ".rck"
|
|
|
|
/* Our caller's local base, "lxl 0 dch", appears in
|
|
* lang/cem/libcc.ansi/setjmp/setjmp.e, lang/m2/libm2/par_misc.e
|
|
*/
|
|
pat lxl dch $1==0
|
|
yields {IND_RC_W, fp, FP_OFFSET}
|
|
|
|
pat dch /* Dynamic chain: LB -> caller's LB */
|
|
with REG
|
|
yields {IND_RC_W, %1, FP_OFFSET}
|
|
|
|
pat lpb /* LB -> argument base */
|
|
leaving adp EM_BSIZE
|
|
|
|
/* "gto" must preserve the function result for "lfr", so
|
|
* longjmp() can pass the return value to setjmp().
|
|
* - See lang/cem/libcc.ansi/setjmp/setjmp.e
|
|
*
|
|
* Must preserve r3 and r4, so no "uses REG".
|
|
* PowerPC can't add r0 + constant. Use r12.
|
|
*/
|
|
pat gto /* longjmp */
|
|
with STACK
|
|
gen
|
|
move {LABEL, $1}, r12
|
|
move {IND_RC_W, r12, 8}, fp
|
|
move {IND_RC_W, r12, 4}, sp
|
|
move {IND_RC_W, r12, 0}, r12
|
|
mtspr ctr, r12
|
|
bctr.
|
|
|
|
pat lor $1==0 /* Load local base */
|
|
leaving lxl 0
|
|
|
|
pat lor $1==1 /* Load stack pointer */
|
|
with STACK
|
|
yields sp
|
|
|
|
/* Next few patterns for "lor 1" appear in
|
|
* lang/m2/libm2/par_misc.e
|
|
*/
|
|
pat lor adp $1==1 && smalls($2) /* sp + constant */
|
|
with STACK
|
|
yields {SUM_RC, sp, $2}
|
|
|
|
/* Subtract stack pointer by doing %1 - (sp - 4)
|
|
* because sp - 4 would point to %1.
|
|
*/
|
|
pat lor sbs loc adu $1==1 && $2==4 && $4==4
|
|
with REG STACK
|
|
uses reusing %1, REG
|
|
gen subf %a, sp, %1
|
|
yields %a
|
|
leaving loc $3+4 adu 4
|
|
pat lor sbs $1==1 && $2==4
|
|
with REG STACK
|
|
uses reusing %1, REG
|
|
gen subf %a, sp, %1
|
|
yields {SUM_RC, %a, 4}
|
|
|
|
pat str $1==0 /* Store local base */
|
|
with INT_W
|
|
gen move %1, fp
|
|
with STACK
|
|
gen
|
|
lwz fp, {IND_RC_W, sp, 0}
|
|
addi sp, sp, {C, 4}
|
|
|
|
pat str $1==1 /* Store stack pointer */
|
|
with INT_W
|
|
kills ALL
|
|
gen move %1, sp
|
|
with STACK
|
|
kills ALL
|
|
gen lwz sp, {IND_RC_W, sp, 0}
|
|
|
|
|
|
/* Single-precision floating-point */
|
|
|
|
pat zrf $1==4 /* Push zero */
|
|
leaving loe ".fs_00000000"
|
|
|
|
pat adf $1==4 /* Add single */
|
|
with FSREG FSREG
|
|
uses reusing %1, reusing %2, FSREG
|
|
gen fadds %a, %2, %1
|
|
yields %a
|
|
pat adf stl $1==4 && inreg($2)==reg_float
|
|
with FSREG FSREG
|
|
gen fadds {LOCAL, $2}, %2, %1
|
|
|
|
pat sbf $1==4 /* Subtract single */
|
|
with FSREG FSREG
|
|
uses reusing %1, reusing %2, FSREG
|
|
gen fsubs %a, %2, %1
|
|
yields %a
|
|
pat sbf stl $1==4 && inreg($2)==reg_float
|
|
with FSREG FSREG
|
|
gen fsubs {LOCAL, $2}, %2, %1
|
|
|
|
pat mlf $1==4 /* Multiply single */
|
|
with FSREG FSREG
|
|
uses reusing %1, reusing %2, FSREG
|
|
gen fmuls %a, %2, %1
|
|
yields %a
|
|
pat mlf stl $1==4 && inreg($2)==reg_float
|
|
with FSREG FSREG
|
|
gen fmuls {LOCAL, $2}, %2, %1
|
|
|
|
pat dvf $1==4 /* Divide single */
|
|
with FSREG FSREG
|
|
uses reusing %1, reusing %2, FSREG
|
|
gen fdivs %a, %2, %1
|
|
yields %a
|
|
pat dvf stl $1==4 && inreg($2)==reg_float
|
|
with FSREG FSREG
|
|
gen fdivs {LOCAL, $2}, %2, %1
|
|
|
|
pat ngf $1==4 /* Negate single */
|
|
with FSREG
|
|
uses reusing %1, FSREG
|
|
gen fneg %a, %1
|
|
yields %a
|
|
pat ngf stl $1==4 && inreg($2)==reg_float
|
|
with FSREG
|
|
gen fneg {LOCAL, $2}, %1
|
|
|
|
/* When a or b is NaN, then a < b, a <= b, a > b, a >= b
|
|
* should all be false. We can't make them false, because
|
|
* - EM's _cmf_ is only for ordered comparisons.
|
|
* - The peephole optimizer assumes (a < b) == !(a >= b).
|
|
*
|
|
* We do make a == b false and a != b true, by checking the
|
|
* eq (equal) bit or un (unordered) bit in cr0.
|
|
*/
|
|
|
|
pat cmf $1==4 /* Compare single */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
/* Extract lt, gt, un; put lt in sign bit. */
|
|
gen andisX %a, %a, {C, 0xd000}
|
|
yields %a
|
|
|
|
pat cmf teq $1==4 /* Single second == top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XEQ, %a}
|
|
|
|
pat cmf tne $1==4 /* Single second == top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XNE, %a}
|
|
|
|
pat cmf tgt $1==4 /* Single second > top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XGT, %a}
|
|
|
|
pat cmf tge $1==4 /* Single second >= top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XGE, %a}
|
|
|
|
pat cmf tlt $1==4 /* Single second < top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XLT, %a}
|
|
|
|
pat cmf tle $1==4 /* Single second <= top */
|
|
with FSREG FSREG
|
|
uses REG={COND_FS, %2, %1}
|
|
yields {XLE, %a}
|
|
|
|
proc cmf4zxx example cmf zeq
|
|
with FSREG FSREG STACK
|
|
gen
|
|
fcmpo cr0, %2, %1
|
|
bxx* {LABEL, $2}
|
|
|
|
/* Pop 2 singles, branch if... */
|
|
pat cmf zeq $1==4 call cmf4zxx("beq")
|
|
pat cmf zne $1==4 call cmf4zxx("bne")
|
|
pat cmf zgt $1==4 call cmf4zxx("bgt")
|
|
pat cmf zge $1==4 call cmf4zxx("bge")
|
|
pat cmf zlt $1==4 call cmf4zxx("blt")
|
|
pat cmf zle $1==4 call cmf4zxx("ble")
|
|
|
|
pat loc loc cff $1==4 && $2==8 /* Convert single to double */
|
|
with FSREG
|
|
yields %1.1
|
|
|
|
pat loc loc cfi $1==4 && $2==4 /* Single to signed int */
|
|
leaving
|
|
loc 4
|
|
loc 8
|
|
cff
|
|
loc 8
|
|
loc 4
|
|
cfi
|
|
|
|
pat loc loc cfu $1==4 && $2==4 /* Single to unsigned int */
|
|
leaving
|
|
loc 4
|
|
loc 8
|
|
cff
|
|
loc 8
|
|
loc 4
|
|
cfu
|
|
|
|
pat loc loc cif $1==4 && $2==4 /* Signed int to single */
|
|
leaving
|
|
loc 4
|
|
loc 8
|
|
cif
|
|
loc 8
|
|
loc 4
|
|
cff
|
|
|
|
pat loc loc cuf $1==4 && $2==4 /* Unsigned int to single */
|
|
leaving
|
|
loc 4
|
|
loc 8
|
|
cuf
|
|
loc 8
|
|
loc 4
|
|
cff
|
|
|
|
pat fef $1==4 /* Split fraction, exponent */
|
|
leaving cal ".fef4"
|
|
|
|
/* Multiply two singles, then split fraction, integer */
|
|
pat fif $1==4
|
|
leaving cal ".fif4"
|
|
|
|
|
|
/* Double-precision floating-point */
|
|
|
|
pat zrf $1==8 /* Push zero */
|
|
leaving lde ".fd_00000000"
|
|
|
|
pat adf $1==8 /* Add double */
|
|
with FREG FREG
|
|
uses reusing %1, reusing %2, FREG
|
|
gen fadd %a, %2, %1
|
|
yields %a
|
|
pat adf sdl $1==8 && inreg($2)==reg_float
|
|
with FREG FREG
|
|
gen fadd {DLOCAL, $2}, %2, %1
|
|
|
|
pat sbf $1==8 /* Subtract double */
|
|
with FREG FREG
|
|
uses reusing %1, reusing %2, FREG
|
|
gen fsub %a, %2, %1
|
|
yields %a
|
|
pat sbf sdl $1==8 && inreg($2)==reg_float
|
|
with FREG FREG
|
|
gen fsub {DLOCAL, $2}, %2, %1
|
|
|
|
pat mlf $1==8 /* Multiply double */
|
|
with FREG FREG
|
|
uses reusing %1, reusing %2, FREG
|
|
gen fmul %a, %2, %1
|
|
yields %a
|
|
pat mlf sdl $1==8 && inreg($2)==reg_float
|
|
with FREG FREG
|
|
gen fmul {DLOCAL, $2}, %2, %1
|
|
|
|
pat dvf $1==8 /* Divide double */
|
|
with FREG FREG
|
|
uses reusing %1, reusing %2, FREG
|
|
gen fdiv %a, %2, %1
|
|
yields %a
|
|
pat dvf sdl $1==8 && inreg($2)==reg_float
|
|
with FREG FREG
|
|
gen fdiv {DLOCAL, $2}, %2, %1
|
|
|
|
pat ngf $1==8 /* Negate double */
|
|
with FREG
|
|
uses reusing %1, FREG
|
|
gen fneg %a, %1
|
|
yields %a
|
|
pat ngf sdl $1==8 && inreg($2)==reg_float
|
|
with FREG
|
|
gen fneg {DLOCAL, $2}, %1
|
|
|
|
/* To compare NaN, see comment above pat cmf $1==4 */
|
|
|
|
pat cmf $1==8 /* Compare double */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
/* Extract lt, gt, un; put lt in sign bit. */
|
|
gen andisX %a, %a, {C, 0xd000}
|
|
yields %a
|
|
|
|
pat cmf teq $1==8 /* Double second == top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XEQ, %a}
|
|
|
|
pat cmf tne $1==8 /* Double second == top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XNE, %a}
|
|
|
|
pat cmf tgt $1==8 /* Double second > top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XGT, %a}
|
|
|
|
pat cmf tge $1==8 /* Double second >= top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XGE, %a}
|
|
|
|
pat cmf tlt $1==8 /* Double second < top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XLT, %a}
|
|
|
|
pat cmf tle $1==8 /* Double second <= top */
|
|
with FREG FREG
|
|
uses REG={COND_FD, %2, %1}
|
|
yields {XLE, %a}
|
|
|
|
proc cmf8zxx example cmf zeq
|
|
with FREG FREG STACK
|
|
gen
|
|
fcmpo cr0, %2, %1
|
|
bxx* {LABEL, $2}
|
|
|
|
/* Pop 2 doubles, branch if... */
|
|
pat cmf zeq $1==8 call cmf8zxx("beq")
|
|
pat cmf zne $1==8 call cmf8zxx("bne")
|
|
pat cmf zgt $1==8 call cmf8zxx("bgt")
|
|
pat cmf zge $1==8 call cmf8zxx("bge")
|
|
pat cmf zlt $1==8 call cmf8zxx("blt")
|
|
pat cmf zle $1==8 call cmf8zxx("ble")
|
|
|
|
/* Convert double to single */
|
|
/* reg_float pattern must be first, or it goes unused! */
|
|
pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float
|
|
with FREG
|
|
gen frsp {LOCAL, $4}, %1
|
|
pat loc loc cff $1==8 && $2==4
|
|
with FREG
|
|
uses reusing %1, FSREG
|
|
gen frsp %a, %1
|
|
yields %a
|
|
|
|
pat loc loc cfi $1==8 && $2==4 /* Double to signed int */
|
|
with FREG STACK
|
|
uses reusing %1, FREG
|
|
gen
|
|
fctiwz %a, %1
|
|
stfdu %a, {IND_RC_D, sp, 0-8}
|
|
addi sp, sp, {C, 4}
|
|
|
|
pat loc loc cfu $1==8 && $2==4 /* Double to unsigned int */
|
|
leaving cal ".cfu8"
|
|
|
|
pat loc loc cif $1==4 && $2==8 /* Signed int to double */
|
|
leaving cal ".cif8"
|
|
|
|
pat loc loc cuf $1==4 && $2==8 /* Unsigned int to double */
|
|
leaving cal ".cuf8"
|
|
|
|
pat fef $1==8 /* Split fraction, exponent */
|
|
leaving cal ".fef8"
|
|
|
|
/* Multiply two doubles, then split fraction, integer */
|
|
pat fif $1==8
|
|
leaving cal ".fif8"
|