EM_WSIZE = 4 EM_PSIZE = 4 EM_BSIZE = 8 /* two words saved in call frame */ INT8 = 1 /* Size of values */ INT16 = 2 INT32 = 4 INT64 = 8 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ SL_OFFSET = 8 /* Offset of static link */ #define COMMENT(n) /* comment {LABEL, n} */ #define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) #define lo(n) ((n) & 0xFFFF) #define hi(n) (((n)>>16) & 0xFFFF) /* Use these for instructions that treat the low half as signed --- his() * includes a modifier to produce the correct value when the low half gets * sign extended. Er, do make sure you load the low half second. */ #define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF))) #define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF) PROPERTIES GPR /* general-purpose register */ REG /* allocatable GPR */ REG3 /* coercion to r3 */ FPR(8) /* floating-point register */ FREG(8) /* allocatable FPR */ FSREG /* allocatable single-precision FPR */ SPR /* special-purpose register */ CR /* condition register */ REGISTERS /* * When ncg allocates regvars, it seems to start with the last * register in the first class. To encourage ncg to allocate * them from r31 down, we list them in one class as * r13, r14, ..., r31: GPR, REG regvar(reg_any). */ r0, sp, fp : GPR. r3 : GPR, REG, REG3. r4, r5, r6, r7, r8, r9, r10, r11, r12 : GPR, REG. r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31 : GPR, REG regvar(reg_any). f0 : FPR. f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13 : FPR, FREG. f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25, f26, f27, f28, f29, f30, f31 : FPR, FREG regvar(reg_float). fs1("f1")=f1, fs2("f2")=f2, fs3("f3")=f3, fs4("f4")=f4, fs5("f5")=f5, fs6("f6")=f6, fs7("f7")=f7, fs8("f8")=f8, fs9("f9")=f9, fs10("f10")=f10, fs11("f11")=f11, fs12("f12")=f12, fs13("f13")=f13 : FSREG. /* reglap: reg_float may have subregister of different size */ fs14("f14")=f14, fs15("f15")=f15, fs16("f16")=f16, fs17("f17")=f17, fs18("f18")=f18, fs19("f19")=f19, fs20("f20")=f20, fs21("f21")=f21, fs22("f22")=f22, fs23("f23")=f23, fs24("f24")=f24, fs25("f25")=f25, fs26("f26")=f26, fs27("f27")=f27, fs28("f28")=f28, fs29("f29")=f29, fs30("f30")=f30, fs31("f31")=f31 : FSREG regvar(reg_float). lr, ctr : SPR. cr0 : CR. #define RSCRATCH r0 #define FSCRATCH f0 TOKENS /* Primitives */ C /* constant */ = { INT val; } 4 val. LABEL = { ADDR adr; } 4 adr. LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]". LOCAL = { INT off; } 4 ">>> BUG IN LOCAL". DLOCAL = { INT off; } 8 ">>> BUG IN DLOCAL". /* Allows us to use regvar() to refer to registers */ GPR_EXPR = { GPR reg; } 4 reg. FPR_EXPR = { FPR reg; } 8 reg. FSREG_EXPR = { FSREG reg; } 4 reg. /* Constants on the stack */ CONST_N8000 = { INT val; } 4 val. CONST_N7FFF_N0001 = { INT val; } 4 val. CONST_0000_7FFF = { INT val; } 4 val. CONST_8000 = { INT val; } 4 val. CONST_8001_FFFF = { INT val; } 4 val. CONST_HI_ZR = { INT val; } 4 val. CONST_HI_LO = { INT val; } 4 val. /* Expression partial results */ SEX_B = { GPR reg; } 4. /* sign extension */ SEX_H = { GPR reg; } 4. SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */ SUM_RC = { GPR reg; INT off; } 4. /* reg + off */ SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */ SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */ SUB_RR = { GPR reg1; GPR reg2; } 4. /* reg1 - reg2 */ NEG_R = { GPR reg; } 4. /* -reg */ MUL_RR = { GPR reg1; GPR reg2; } 4. /* reg1 * reg2 */ DIV_RR = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 signed */ DIV_RR_U = { GPR reg1; GPR reg2; } 4. /* reg1 / reg2 unsigned */ IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_B = { GPR reg1; GPR reg2; } 4. IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_H = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_H = { GPR reg1; GPR reg2; } 4. IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_H_S = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_H_S = { GPR reg1; GPR reg2; } 4. IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_W = { GPR reg1; GPR reg2; } 4. IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")". IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. NOT_R = { GPR reg; } 4. /* ~reg */ AND_RIS = { GPR reg; INT valhi; } 4. AND_RC = { GPR reg; INT val; } 4. AND_RR = { GPR reg1; GPR reg2; } 4. ANDC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 & ~reg2 */ OR_RIS = { GPR reg; INT valhi; } 4. OR_RC = { GPR reg; INT val; } 4. OR_RR = { GPR reg1; GPR reg2; } 4. ORC_RR = { GPR reg1; GPR reg2; } 4. /* reg1 | ~reg2 */ XOR_RIS = { GPR reg; INT valhi; } 4. XOR_RC = { GPR reg; INT val; } 4. XOR_RR = { GPR reg1; GPR reg2; } 4. NAND_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 & reg2) */ NOR_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 | reg2) */ EQV_RR = { GPR reg1; GPR reg2; } 4. /* ~(reg1 ^ reg2) */ COND_RC = { GPR reg; INT val; } 4. COND_RR = { GPR reg1; GPR reg2; } 4. CONDL_RC = { GPR reg; INT val; } 4. CONDL_RR = { GPR reg1; GPR reg2; } 4. COND_FS = { FSREG reg1; FSREG reg2; } 4. COND_FD = { FREG reg1; FREG reg2; } 4. XEQ = { GPR reg; } 4. XNE = { GPR reg; } 4. XGT = { GPR reg; } 4. XGE = { GPR reg; } 4. XLT = { GPR reg; } 4. XLE = { GPR reg; } 4. SETS /* signed 16-bit integer */ CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF. /* integer that, when negated, fits signed 16-bit */ CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000. /* unsigned 16-bit integer */ UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. /* any constant on stack */ CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF + CONST_HI_ZR + CONST_HI_LO. CONST = C + CONST_STACK. IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. /* anything killed by sti (store indirect) */ MEMORY = IND_ALL_B + IND_ALL_H + IND_ALL_W + IND_ALL_D. /* any integer from stack that we can easily move to GPR */ INT_W = REG + CONST_STACK + SEX_B + SEX_H + SUM_RIS + SUM_RC + SUM_RL + SUM_RR + SUB_RR + NEG_R + MUL_RR + DIV_RR + DIV_RR_U + IND_ALL_B + IND_ALL_H + IND_ALL_W + NOT_R + AND_RIS + AND_RC + AND_RR + ANDC_RR + OR_RIS + OR_RC + OR_RR + ORC_RR + XOR_RIS + XOR_RC + XOR_RR + NAND_RR + NOR_RR + EQV_RR + XEQ + XNE + XGT + XGE + XLT + XLE. FLOAT_D = FREG + IND_ALL_D. FLOAT_W = FSREG + IND_ALL_W. INSTRUCTIONS /* We give time as cycles of total latency from Freescale * Semiconductor, MPC7450 RISC Microprocessor Family Reference * Manual, Rev. 5, section 6.6. * * We have only 4-byte alignment for doubles; 8-byte alignment is * optimal. We guess the misalignment penalty by adding 1 cycle to * the cost of loading or storing a double: * lfd lfdu lfdx: 4 -> 5 * stfd stfdu stfdx: 3 -> 4 */ cost(4, 1) /* space, time */ add GPR:wo, GPR:ro, GPR:ro. addX "add." GPR:wo, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro. li GPR:wo, CONST:ro. addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro. lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro. and GPR:wo, GPR:ro, GPR:ro. andc GPR:wo, GPR:ro, GPR:ro. andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro. andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro. b LABEL:ro. bc CONST:ro, CONST:ro, LABEL:ro. bdnz LABEL:ro. beq LABEL:ro. bne LABEL:ro. bgt LABEL:ro. bge LABEL:ro. blt LABEL:ro. ble LABEL:ro. bxx LABEL:ro. /* dummy */ bcctr CONST:ro, CONST:ro, CONST:ro. bctr. bcctrl CONST:ro, CONST:ro, CONST:ro. bctrl. bclr CONST:ro, CONST:ro, CONST:ro. blr. bl LABEL:ro. cmp CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmpw GPR:ro, GPR:ro kills :cc. cmpi CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmpwi GPR:ro, CONST:ro kills :cc. cmpl CR:wo, CONST:ro, GPR:ro, GPR:ro kills :cc. cmplw GPR:ro, GPR:ro kills :cc. cmpli CR:wo, CONST:ro, GPR:ro, CONST:ro kills :cc. cmplwi GPR:ro, CONST:ro kills :cc. divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). eqv GPR:wo, GPR:ro, GPR:ro. extsb GPR:wo, GPR:ro. extsh GPR:wo, GPR:ro. fadd FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5). fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5). fctiwz FREG:wo, FREG:ro. fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35). fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21). fmr FPR:wo, FPR:ro cost(4, 5). fmr FSREG:wo, FSREG:ro cost(4, 5). fmul FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5). fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5). frsp FSREG+LOCAL:wo, FREG:ro cost(4, 5). fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5). lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). lfdu FPR:wo, IND_RC_D:ro cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). lfs FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3). lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:rw cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). mfcr GPR:wo cost(4,2). mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. or GPR:wo, GPR:ro, GPR:ro. mr GPR:wo, GPR:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. rotlwi GPR+LOCAL:wo, GPR:ro, CONST:ro. rotrwi GPR+LOCAL:wo, GPR:ro, CONST:ro. slwi GPR+LOCAL:wo, GPR:ro, CONST:ro. srwi GPR+LOCAL:wo, GPR:ro, CONST:ro. rlwnm GPR:wo, GPR:ro, GPR:ro, CONST:ro, CONST:ro. rotlw GPR+LOCAL:wo, GPR:ro, GPR:ro. slw GPR+LOCAL:wo, GPR:ro, GPR:ro. subf GPR:wo, GPR:ro, GPR:ro. sraw GPR+LOCAL:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR+LOCAL:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR+LOCAL:wo, GPR:ro, GPR:ro. stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR:ro, IND_RC_W:rw cost(4, 3). xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. bug ">>> BUG" LABEL:ro cost(0, 0). comment "!" LABEL:ro cost(0, 0). MOVES from GPR to GPR gen mr %2, %1 from FSREG to FSREG gen fmr %2, %1 from FPR to FPR gen fmr %2, %1 /* Constants */ from CONST smalls(%val) to GPR gen COMMENT("move CONST->GPR smalls") li %2, %1 from CONST lo(%val)==0 to GPR gen COMMENT("move CONST->GPR shifted") lis %2, {C, hi(%1.val)} from CONST to GPR gen COMMENT("move CONST->GPR") lis %2, {C, hi(%1.val)} ori %2, %2, {C, lo(%1.val)} /* Can't use addi %2, %2, {C, los(%1.val)} * because %2 might be R0. */ from LABEL to GPR gen COMMENT("move LABEL->GPR") lis %2, {LABEL_HI, %1.adr} ori %2, %2, {LABEL_LO, %1.adr} from LABEL_HA to GPR gen lis %2, %1 /* Sign extension */ from SEX_B to GPR gen extsb %2, %1.reg from SEX_H to GPR gen extsh %2, %1.reg /* Register + something */ from SUM_RIS to GPR gen addis %2, %1.reg, {C, %1.offhi} from SUM_RC to GPR gen addi %2, %1.reg, {C, %1.off} from SUM_RL to GPR gen addi %2, %1.reg, {LABEL_LO, %1.adr} from SUM_RR to GPR gen add %2, %1.reg1, %1.reg2 /* Other arithmetic */ from SUB_RR to GPR /* reg1 - reg2 -> subtract reg2 from reg1 */ gen subf %2, %1.reg2, %1.reg1 from NEG_R to GPR gen neg %2, %1.reg from MUL_RR to GPR gen mullw %2, %1.reg1, %1.reg2 from DIV_RR to GPR gen divw %2, %1.reg1, %1.reg2 from DIV_RR_U to GPR gen divwu %2, %1.reg1, %1.reg2 /* Read byte */ from IND_RC_B+IND_RL_B to GPR gen lbz %2, %1 from IND_RR_B to GPR gen lbzx %2, %1.reg1, %1.reg2 /* Write byte */ from GPR to IND_RC_B+IND_RL_B gen stb %1, %2 from GPR to IND_RR_B gen stbx %1, %2.reg1, %2.reg2 /* Read halfword (short) */ from IND_RC_H+IND_RL_H to GPR gen lhz %2, %1 from IND_RR_H to GPR gen lhzx %2, %1.reg1, %1.reg2 from IND_RC_H_S+IND_RL_H_S to GPR gen lha %2, %1 from IND_RR_H_S to GPR gen lhax %2, %1.reg1, %1.reg2 /* Write halfword */ from GPR to IND_RC_H+IND_RL_H gen sth %1, %2 from GPR to IND_RR_H gen sthx %1, %2.reg1, %2.reg2 /* Read word */ from IND_RC_W+IND_RL_W to GPR gen lwz %2, %1 from IND_RR_W to GPR gen lwzx %2, %1.reg1, %1.reg2 from IND_RC_W+IND_RL_W to FSREG gen lfs %2, %1 from IND_RR_W to FSREG gen lfsx %2, %1.reg1, %1.reg2 /* Write word */ from GPR to IND_RC_W+IND_RL_W gen stw %1, %2 from GPR to IND_RR_W gen stwx %1, %2.reg1, %2.reg2 from FSREG to IND_RC_W+IND_RL_W gen stfs %1, %2 from FSREG to IND_RR_W gen stfsx %1, %2.reg1, %2.reg2 /* Read double */ from IND_RC_D+IND_RL_D to FPR gen lfd %2, %1 from IND_RR_D to FPR gen lfdx %2, %1.reg1, %1.reg2 /* Write double */ from FPR to IND_RC_D+IND_RL_D gen stfd %1, %2 from FPR to IND_RR_D gen stfdx %1, %2.reg1, %2.reg2 /* Logicals */ from NOT_R to GPR gen nor %2, %1.reg, %1.reg from AND_RIS to GPR gen andisX %2, %1.reg, {C, %1.valhi} from AND_RC to GPR gen andiX %2, %1.reg, {C, %1.val} from AND_RR to GPR gen and %2, %1.reg1, %1.reg2 from ANDC_RR to GPR gen andc %2, %1.reg1, %1.reg2 from OR_RIS to GPR gen oris %2, %1.reg, {C, %1.valhi} from OR_RC to GPR gen ori %2, %1.reg, {C, %1.val} from OR_RR to GPR gen or %2, %1.reg1, %1.reg2 from ORC_RR to GPR gen orc %2, %1.reg1, %1.reg2 from XOR_RIS to GPR gen xoris %2, %1.reg, {C, %1.valhi} from XOR_RC to GPR gen xori %2, %1.reg, {C, %1.val} from XOR_RR to GPR gen xor %2, %1.reg1, %1.reg2 from NAND_RR to GPR gen nand %2, %1.reg1, %1.reg2 from NOR_RR to GPR gen nor %2, %1.reg1, %1.reg2 from EQV_RR to GPR gen eqv %2, %1.reg1, %1.reg2 /* Conditions */ /* Compare values, then copy cr0 to GPR. */ from COND_RC to GPR gen cmpwi %1.reg, {C, %1.val} mfcr %2 from COND_RR to GPR gen cmpw %1.reg1, %1.reg2 mfcr %2 from CONDL_RC to GPR gen cmplwi %1.reg, {C, %1.val} mfcr %2 from CONDL_RR to GPR gen cmplw %1.reg1, %1.reg2 mfcr %2 from COND_FS to GPR gen fcmpo cr0, %1.reg1, %1.reg2 mfcr %2 from COND_FD to GPR gen fcmpo cr0, %1.reg1, %1.reg2 mfcr %2 /* Given a copy of cr0 in %1.reg, extract a condition bit * (lt, gt, eq) and perhaps flip it. */ from XEQ to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 2} from XNE to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 2} xori %2, %2, {C, 1} from XGT to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 1} from XGE to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 0} xori %2, %2, {C, 1} from XLT to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 0} from XLE to GPR gen extrwi %2, %1.reg, {C, 1}, {C, 1} xori %2, %2, {C, 1} /* GPR_EXPR exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. We can then use our moves to GPR to set register variables. We define no moves to LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */ from INT_W to GPR_EXPR gen move %1, %2.reg from FLOAT_D to FPR_EXPR gen move %1, %2.reg from FLOAT_W to FSREG_EXPR gen move %1, %2.reg TESTS /* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1, * not allowed here". We use orX_readonly to trick ncgg. * * Using "or." and not "mr." because mach/powerpc/top/table * was optimizing "or." and not "mr.". */ to test GPR gen orX_readonly %1, %1, %1 STACKINGRULES from REG to STACK gen COMMENT("stack REG") stwu %1, {IND_RC_W, sp, 0-4} from INT_W-REG to STACK gen COMMENT("stack INT_W-REG") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, sp, 0-4} from FLOAT_D-FREG to STACK gen COMMENT("stack FLOAT_D-FREG") move %1, FSCRATCH stfdu FSCRATCH, {IND_RC_D, sp, 0-8} from FREG to STACK gen COMMENT("stack FREG") stfdu %1, {IND_RC_D, sp, 0-8} from FSREG to STACK gen COMMENT("stack FSREG") stfsu %1, {IND_RC_W, sp, 0-4} /* * We never stack LOCAL or DLOCAL tokens, because we only use * them for register variables, so ncg pushes the register, * not the token. These rules only prevent an error in ncgg. */ from LOCAL to STACK gen bug {LABEL, "STACKING LOCAL"} from DLOCAL to STACK gen bug {LABEL, "STACKING DLOCAL"} COERCIONS from STACK uses REG gen COMMENT("coerce STACK->REG") lwz %a, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} yields %a from STACK uses FREG gen COMMENT("coerce STACK->FREG") lfd %a, {IND_RC_D, sp, 0} addi sp, sp, {C, 8} yields %a from STACK uses FSREG gen COMMENT("coerce STACK->FSREG") lfs %a, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} yields %a /* "uses REG=%1" may find and reuse a register containing the * same token. For contrast, "uses REG gen move %1, %a" would * pick a different register before doing the move. */ from INT_W uses REG=%1 yields %a /* * There is no coercion from IND_ALL_D to REG REG, because * coercions can't allocate registers for intermediate values. * * A coercion to split IND_RC_D into two IND_RC_W, without * allocating an intermediate register, would yield * {IND_RC_W, %1.val, %1.off+4} * but %1.off+4 might overflow a signed 16-bit integer. */ from FLOAT_D uses FREG=%1 yields %a from FLOAT_W uses FSREG=%1 yields %a PATTERNS /* Constants */ pat loc $1==(0-0x8000) /* Load constant */ yields {CONST_N8000, $1} pat loc $1>=(0-0x7FFF) && $1<=(0-1) yields {CONST_N7FFF_N0001, $1} pat loc $1>=0 && $1<=0x7FFF yields {CONST_0000_7FFF, $1} pat loc $1==0x8000 yields {CONST_8000, $1} pat loc $1>=0x8001 && $1<=0xFFFF yields {CONST_8001_FFFF, $1} pat loc lo($1)==0 yields {CONST_HI_ZR, $1} pat loc yields {CONST_HI_LO, $1} /* Stack shuffles */ /* The peephole optimizer does: loc $1 ass 4 -> asp $1 * To optimize multiplication, it uses: dup 8 asp 4 */ pat asp $1==4 /* Adjust stack by constant */ with exact INT_W+FLOAT_W /* drop %1 */ with STACK gen addi sp, sp, {C, 4} pat asp smalls($1) with STACK gen addi sp, sp, {C, $1} pat asp lo($1)==0 with STACK gen addi sp, sp, {C, hi($1)} pat asp with STACK gen addis sp, sp, {C, his($1)} addi sp, sp, {C, los($1)} pat ass $1==4 /* Adjust stack by variable */ with REG STACK gen add sp, sp, %1 /* To duplicate a token, we coerce the token into a register, * then duplicate the register. This decreases code size. */ pat dup $1==4 /* Duplicate word on top of stack */ with REG+FSREG yields %1 %1 pat dup $1==8 /* Duplicate double-word */ with REG+FSREG REG+FSREG yields %2 %1 %2 %1 with FREG yields %1 %1 pat dup /* Duplicate other size */ leaving loc $1 dus 4 pat dus $1==4 /* Duplicate variable size */ with REG STACK /* ( a size%1 -- a a ) */ uses REG, REG gen srwi %a, %1, {C, 2} mtspr ctr, %a add %b, sp, %1 1: lwzu %a, {IND_RC_W, %b, 0-4} stwu %a, {IND_RC_W, sp, 0-4} bdnz {LABEL, "1b"} pat exg $1==4 /* Exchange top two words */ with INT_W+FLOAT_W INT_W+FLOAT_W yields %1 %2 pat exg defined($1) /* Exchange other size */ leaving loc $1 cal ".exg" pat exg !defined($1) leaving cal ".exg" pat ste loe $1==$2 /* Store then load external */ leaving dup 4 ste $1 /* Type conversions */ pat loc loc ciu /* signed -> unsigned */ leaving loc $1 loc $2 cuu pat loc loc cui /* unsigned -> signed */ leaving loc $1 loc $2 cuu pat loc loc cuu $1<=4 && $2<=4 /* unsigned -> unsigned */ /* nop */ pat loc loc cii $1<=4 && $2<=$1 /* signed -> signed of smaller or same size, * no sign extension */ pat loc loc cii $1==1 && $2<=4 /* sign-extend char */ with REG yields {SEX_B, %1} pat loc loc cii $1==2 && $2<=4 /* sign-extend short */ with REG yields {SEX_H, %1} /* Local variables */ pat lal smalls($1) /* Load address of local */ yields {SUM_RC, fp, $1} pat lal /* Load address of local */ uses REG={SUM_RIS, fp, his($1)} yields {SUM_RC, %a, los($1)} /* Load word from local */ pat lol inreg($1)==reg_any || inreg($1)==reg_float yields {LOCAL, $1} pat lol leaving lal $1 loi 4 /* Load double-word from local */ pat ldl inreg($1)==reg_float yields {DLOCAL, $1} pat ldl leaving lal $1 loi 8 /* Store word to local */ pat stl inreg($1)==reg_any with exact INT_W /* ncg fails to infer that regvar($1) is dead! */ kills regvar($1) gen move %1, {GPR_EXPR, regvar($1)} with STACK gen lwz {LOCAL, $1}, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} pat stl inreg($1)==reg_float with exact FSREG+IND_ALL_W kills regvar_w($1, reg_float) gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)} with STACK gen lfs {LOCAL, $1}, {IND_RC_W, sp, 0} addi sp, sp, {C, 4} pat stl leaving lal $1 sti 4 /* Store double-word to local */ pat sdl inreg($1)==reg_float with exact FREG+IND_ALL_D kills regvar_d($1, reg_float) gen move %1, {FPR_EXPR, regvar_d($1, reg_float)} with STACK gen lfd {DLOCAL, $1}, {IND_RC_D, sp, 0} addi sp, sp, {C, 8} pat sdl leaving lal $1 sti 8 /* Load indirect from local */ pat lil inreg($1)==reg_any yields {IND_RC_W, regvar($1), 0} pat lil leaving lol $1 loi 4 pat sil /* Save to indirected local */ leaving lol $1 sti 4 pat zrl /* Zero local */ leaving loc 0 stl $1 pat inl /* Increment local */ leaving lol $1 loc 1 adi 4 stl $1 pat del /* Decrement local */ leaving lol $1 loc 1 sbi 4 stl $1 /* Global variables */ pat lpi /* Load address of external function */ leaving lae $1 pat lae /* Load address of external */ uses REG={LABEL_HA, $1} yields {SUM_RL, %a, $1} pat loe /* Load word external */ leaving lae $1 loi INT32 pat ste /* Store word external */ leaving lae $1 sti INT32 pat lde /* Load double-word external */ leaving lae $1 loi INT64 pat sde /* Store double-word external */ leaving lae $1 sti INT64 pat zre /* Zero external */ leaving loc 0 ste $1 pat ine /* Increment external */ leaving loe $1 inc ste $1 pat dee /* Decrement external */ leaving loe $1 dec ste $1 /* Structures */ pat lof /* Load word offsetted */ leaving adp $1 loi INT32 pat ldf /* Load double-word offsetted */ leaving adp $1 loi INT64 pat stf /* Store word offsetted */ leaving adp $1 sti INT32 pat sdf /* Store double-word offsetted */ leaving adp $1 sti INT64 /* Loads and stores */ pat loi $1==INT8 /* Load byte indirect */ with REG yields {IND_RC_B, %1, 0} with exact SUM_RC yields {IND_RC_B, %1.reg, %1.off} with exact SUM_RL yields {IND_RL_B, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_B, %1.reg1, %1.reg2} pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ with REG yields {IND_RC_H_S, %1, 0} with exact SUM_RC yields {IND_RC_H_S, %1.reg, %1.off} with exact SUM_RL yields {IND_RL_H_S, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_H_S, %1.reg1, %1.reg2} pat loi $1==INT16 /* Load half-word indirect */ with REG yields {IND_RC_H, %1, 0} with exact SUM_RC yields {IND_RC_H, %1.reg, %1.off} with exact SUM_RL yields {IND_RL_H, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_H, %1.reg1, %1.reg2} pat loi $1==INT32 /* Load word indirect */ with REG yields {IND_RC_W, %1, 0} with exact SUM_RC yields {IND_RC_W, %1.reg, %1.off} with exact SUM_RL yields {IND_RL_W, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} pat loi $1==INT64 /* Load double-word indirect */ with REG yields {IND_RC_D, %1, 0} with exact SUM_RC yields {IND_RC_D, %1.reg, %1.off} with exact SUM_RL yields {IND_RL_D, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_D, %1.reg1, %1.reg2} pat loi /* Load arbitrary size */ leaving loc $1 los 4 pat los $1==4 /* Load arbitrary size */ with REG3 STACK kills ALL gen bl {LABEL, ".los4"} pat sti $1==INT8 /* Store byte indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_B, %1, 0} with SUM_RC REG kills MEMORY gen move %2, {IND_RC_B, %1.reg, %1.off} with SUM_RL REG kills MEMORY gen move %2, {IND_RL_B, %1.reg, %1.adr} with SUM_RR REG kills MEMORY gen move %2, {IND_RR_B, %1.reg1, %1.reg2} pat sti $1==INT16 /* Store half-word indirect */ with REG REG kills MEMORY gen move %2, {IND_RC_H, %1, 0} with SUM_RC REG kills MEMORY gen move %2, {IND_RC_H, %1.reg, %1.off} with SUM_RL REG kills MEMORY gen move %2, {IND_RL_H, %1.reg, %1.adr} with SUM_RR REG kills MEMORY gen move %2, {IND_RR_H, %1.reg1, %1.reg2} pat sti $1==INT32 /* Store word indirect */ with REG REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1, 0} with SUM_RC REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1.reg, %1.off} with SUM_RL REG+FSREG kills MEMORY gen move %2, {IND_RL_W, %1.reg, %1.adr} with SUM_RR REG+FSREG kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} pat sti $1==INT64 /* Store double-word indirect */ with REG FREG kills MEMORY gen move %2, {IND_RC_D, %1, 0} with SUM_RC FREG kills MEMORY gen move %2, {IND_RC_D, %1.reg, %1.off} with SUM_RL FREG kills MEMORY gen move %2, {IND_RL_D, %1.reg, %1.adr} with SUM_RR FREG kills MEMORY gen move %2, {IND_RR_D, %1.reg1, %1.reg2} with REG REG REG kills MEMORY gen move %2, {IND_RC_W, %1, 0} move %3, {IND_RC_W, %1, 4} pat sti /* Store arbitrary size */ leaving loc $1 sts 4 pat sts $1==4 /* Store arbitrary size */ with REG3 STACK kills ALL gen bl {LABEL, ".sts4"} /* Arithmetic wrappers */ pat ads $1==4 /* Add var to pointer */ leaving adi $1 pat sbs $1==4 /* Subtract var from pointer */ leaving sbi $1 pat adp /* Add constant to pointer */ leaving loc $1 adi 4 pat adu /* Add unsigned */ leaving adi $1 pat sbu /* Subtract unsigned */ leaving sbi $1 pat inc /* Add 1 */ leaving loc 1 adi 4 pat dec /* Subtract 1 */ leaving loc 1 sbi 4 pat mlu /* Multiply unsigned */ leaving mli $1 pat slu /* Shift left unsigned */ leaving sli $1 /* Word arithmetic */ pat adi $1==4 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} with CONST2 REG yields {SUM_RC, %2, %1.val} with REG CONST2 yields {SUM_RC, %1, %2.val} with CONST_HI_ZR REG yields {SUM_RIS, %2, his(%1.val)} with REG CONST_HI_ZR yields {SUM_RIS, %1, his(%2.val)} with CONST_STACK-CONST2-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} yields {SUM_RC, %a, los(%1.val)} with REG CONST_STACK-CONST2-CONST_HI_ZR uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} pat sbi $1==4 /* Subtract word (second - top) */ with REG REG uses reusing %2, REG yields {SUB_RR, %2, %1} with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} with CONST_HI_ZR REG yields {SUM_RIS, %2, his(0-%1.val)} with CONST_STACK-CONST2_WHEN_NEG-CONST_HI_ZR REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} pat ngi $1==4 /* Negate word */ with REG yields {NEG_R, %1} pat mli $1==4 /* Multiply word (second * top) */ with REG REG yields {MUL_RR, %2, %1} pat dvi $1==4 /* Divide word (second / top) */ with REG REG yields {DIV_RR, %2, %1} pat dvu $1==4 /* Divide unsigned word (second / top) */ with REG REG yields {DIV_RR_U, %2, %1} /* To calculate a remainder: a % b = a - (a / b * b) */ pat rmi $1==4 /* Remainder word (second % top) */ with REG REG uses REG={DIV_RR, %2, %1}, REG gen move {MUL_RR, %a, %1}, %b yields {SUB_RR, %2, %b} pat rmu $1==4 /* Remainder unsigned word (second % top) */ with REG REG uses REG={DIV_RR_U, %2, %1}, REG gen move {MUL_RR, %a, %1}, %b yields {SUB_RR, %2, %b} /* Bitwise logic */ pat and $1==4 /* AND word */ with REG NOT_R yields {ANDC_RR, %1, %2.reg} with NOT_R REG yields {ANDC_RR, %2, %1.reg} with REG REG yields {AND_RR, %1, %2} with REG UCONST2 yields {AND_RC, %1, %2.val} with UCONST2 REG yields {AND_RC, %2, %1.val} with REG CONST_HI_ZR yields {AND_RIS, %1, hi(%2.val)} with CONST_HI_ZR REG yields {AND_RIS, %2, hi(%1.val)} pat and defined($1) /* AND set */ leaving loc $1 cal ".and" pat and !defined($1) leaving cal ".and" pat ior $1==4 /* OR word */ with REG NOT_R yields {ORC_RR, %1, %2.reg} with NOT_R REG yields {ORC_RR, %2, %1.reg} with REG REG yields {OR_RR, %1, %2} with REG UCONST2 yields {OR_RC, %1, %2.val} with UCONST2 REG yields {OR_RC, %2, %1.val} with REG CONST_HI_ZR yields {OR_RIS, %1, hi(%2.val)} with CONST_HI_ZR REG yields {OR_RIS, %2, hi(%1.val)} with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} yields {OR_RC, %1, lo(%2.val)} with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} yields {OR_RC, %2, lo(%1.val)} pat ior defined($1) /* OR set */ leaving loc $1 cal ".ior" /* OR set (variable), used in lang/m2/libm2/LtoUset.e */ pat ior !defined($1) leaving cal ".ior" pat xor $1==4 /* XOR word */ with REG REG yields {XOR_RR, %1, %2} with REG UCONST2 yields {XOR_RC, %1, %2.val} with UCONST2 REG yields {XOR_RC, %2, %1.val} with REG CONST_HI_ZR yields {XOR_RIS, %1, hi(%2.val)} with CONST_HI_ZR REG yields {XOR_RIS, %2, hi(%1.val)} with REG CONST_STACK-UCONST2-CONST_HI_ZR uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} yields {XOR_RC, %1, lo(%2.val)} with CONST_STACK-UCONST2-CONST_HI_ZR REG uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} yields {XOR_RC, %2, lo(%1.val)} pat xor defined($1) /* XOR set */ leaving loc $1 cal ".xor" pat xor !defined($1) leaving cal ".xor" pat com $1==INT32 /* NOT word */ with exact AND_RR yields {NAND_RR, %1.reg1, %1.reg2} with exact OR_RR yields {NOR_RR, %1.reg1, %1.reg2} with exact XOR_RR yields {EQV_RR, %1.reg1, %1.reg2} with REG yields {NOT_R, %1} pat com defined($1) /* NOT set */ leaving loc $1 cal ".com" pat com !defined($1) leaving cal ".com" pat zer $1==4 /* Push zero */ leaving loc 0 pat zer defined($1) /* Create empty set */ leaving loc $1 cal ".zer" /* Shifts and rotations */ pat sli $1==4 /* Shift left (second << top) */ with CONST_STACK REG uses reusing %2, REG gen slwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG gen slw %a, %2, %1 yields %a pat sli stl $1==4 && inreg($2)==reg_any with CONST_STACK REG gen slwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} with REG REG gen slw {LOCAL, $2}, %2, %1 pat sri $1==4 /* Shift right signed (second >> top) */ with CONST_STACK REG uses reusing %2, REG gen srawi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG gen sraw %a, %2, %1 yields %a pat sri stl $1==4 && inreg($2)==reg_any with CONST_STACK REG gen srawi {LOCAL, $2}, %2, {C, %1.val & 0x1F} with REG REG gen sraw {LOCAL, $2}, %2, %1 pat sru $1==4 /* Shift right unsigned (second >> top) */ with CONST_STACK REG uses reusing %2, REG gen srwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG gen srw %a, %2, %1 yields %a pat sru stl $1==4 && inreg($2)==reg_any with CONST_STACK REG gen srwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} with REG REG gen srw {LOCAL, $2}, %2, %1 pat rol $1==4 /* Rotate left word */ with CONST_STACK REG uses reusing %2, REG gen rotlwi %a, %2, {C, %1.val & 0x1F} yields %a with REG REG uses reusing %2, REG gen rotlw %a, %2, %1 yields %a pat rol stl $1==4 && inreg($2)==reg_any with CONST_STACK REG gen rotlwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} with REG REG gen rotlw {LOCAL, $2}, %2, %1 /* * ror 4 -> ngi 4, rol 4 * because to rotate right by n bits is to rotate left by * (32 - n), which is to rotate left by -n. PowerPC rotlw * handles -n as (-n & 0x1F). */ pat ror $1==4 /* Rotate right word */ with CONST_STACK REG uses reusing %2, REG gen rotrwi %a, %2, {C, %1.val & 0x1F} yields %a with /* anything */ leaving ngi 4 rol 4 pat ror stl $1==4 && inreg($2)==reg_any with CONST_STACK REG gen rotrwi {LOCAL, $2}, %2, {C, %1.val & 0x1F} with /* anything */ leaving ngi 4 rol 4 stl $2 /* Arrays */ pat aar $1==4 /* Address of array element */ leaving cal ".aar4" pat lar $1==4 /* Load from array */ with STACK kills ALL gen bl {LABEL, ".aar4"} /* pass r3 = size from .aar4 to .los4 */ bl {LABEL, ".los4"} pat lae lar $2==4 && nicesize(rom($1, 3)) leaving lae $1 aar 4 loi rom($1, 3) pat sar $1==4 /* Store to array */ with STACK kills ALL gen bl {LABEL, ".aar4"} /* pass r3 = size from .aar4 to .sts4 */ bl {LABEL, ".sts4"} pat lae sar $2==4 && nicesize(rom($1, 3)) leaving lae $1 aar 4 sti rom($1, 3) /* Sets */ pat set defined($1) /* Create singleton set */ leaving loc $1 cal ".set" /* Create set (variable), used in lang/m2/libm2/LtoUset.e */ pat set !defined($1) leaving cal ".set" pat inn defined($1) /* Test for set bit */ leaving loc $1 cal ".inn" pat inn !defined($1) leaving cal ".inn" /* Boolean resolutions */ pat teq /* top = (top == 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XEQ, %a} pat tne /* top = (top != 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XNE, %a} pat tlt /* top = (top < 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XLT, %a} pat tle /* top = (top <= 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XLE, %a} pat tgt /* top = (top > 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XGT, %a} pat tge /* top = (top >= 0) */ with REG uses reusing %1, REG gen test %1 mfcr %a yields {XGE, %a} pat cmi teq $1==4 /* Signed second == top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XEQ, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XEQ, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XEQ, %a} pat cmi tne $1==4 /* Signed second != top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XNE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XNE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XNE, %a} pat cmi tgt $1==4 /* Signed second > top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XLT, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XGT, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XGT, %a} pat cmi tge $1==4 /* Signed second >= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XLE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XGE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XGE, %a} pat cmi tlt $1==4 /* Signed second < top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XGT, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XLT, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XLT, %a} pat cmi tle $1==4 /* Signed second <= top */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} yields {XGE, %a} with CONST2 REG uses reusing %1, REG={COND_RC, %2, %1.val} yields {XLE, %a} with REG REG uses reusing %1, REG={COND_RR, %2, %1} yields {XLE, %a} pat cmu teq $1==4 /* Unsigned second == top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XEQ, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XEQ, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XEQ, %a} pat cmu tne $1==4 /* Unsigned second != top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XNE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XNE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XNE, %a} pat cmu tgt $1==4 /* Unsigned second > top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XLT, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XGT, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XGT, %a} pat cmu tge $1==4 /* Unsigned second >= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XLE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XGE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XGE, %a} pat cmu tlt $1==4 /* Unsigned second < top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XGT, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XLT, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XLT, %a} pat cmu tle $1==4 /* Unsigned second <= top */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} yields {XGE, %a} with UCONST2 REG uses reusing %1, REG={CONDL_RC, %2, %1.val} yields {XLE, %a} with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} yields {XLE, %a} /* Simple branches */ proc zxx example zeq with REG STACK gen test %1 bxx* {LABEL, $1} /* Pop signed int, branch if... */ pat zeq call zxx("beq") /* top == 0 */ pat zne call zxx("bne") /* top != 0 */ pat zgt call zxx("bgt") /* top > 0 */ pat zge call zxx("bge") /* top >= 0 */ pat zlt call zxx("blt") /* top < 0 */ pat zle call zxx("ble") /* top >= 0 */ /* The peephole optimizer rewrites * cmi 4 zeq * as beq, and does same for bne, bgt, and so on. */ proc bxx example beq with REG CONST2 STACK gen cmpwi %1, %2 bxx[2] {LABEL, $1} with CONST2 REG STACK gen cmpwi %2, %1 bxx[1] {LABEL, $1} with REG REG STACK gen cmpw %2, %1 bxx[1] {LABEL, $1} /* Pop two signed ints, branch if... */ pat beq call bxx("beq", "beq") /* second == top */ pat bne call bxx("bne", "bne") /* second != top */ pat bgt call bxx("bgt", "blt") /* second > top */ pat bge call bxx("bge", "ble") /* second >= top */ pat blt call bxx("blt", "bgt") /* second < top */ pat ble call bxx("ble", "bge") /* second >= top */ proc cmu4zxx example cmu zeq with REG CONST2 STACK gen cmplwi %1, %2 bxx[2] {LABEL, $2} with CONST2 REG STACK gen cmplwi %2, %1 bxx[1] {LABEL, $2} with REG REG STACK gen cmplw %2, %1 bxx[1] {LABEL, $2} /* Pop two unsigned ints, branch if... */ pat cmu zeq $1==4 call cmu4zxx("beq", "beq") pat cmu zne $1==4 call cmu4zxx("bne", "bne") pat cmu zgt $1==4 call cmu4zxx("bgt", "blt") pat cmu zge $1==4 call cmu4zxx("bge", "ble") pat cmu zlt $1==4 call cmu4zxx("blt", "bgt") pat cmu zle $1==4 call cmu4zxx("ble", "bge") /* Comparisons */ /* Each comparison extracts the lt and gt bits from cr0. * extlwi %a, %a, 2, 0 * puts lt in the sign bit, so lt yields a negative result, * gt yields positive. * rlwinm %a, %a, 1, 31, 0 * puts gt in the sign bit, to reverse the comparison. */ pat cmi $1==INT32 /* Signed tristate compare */ with REG CONST2 uses reusing %1, REG={COND_RC, %1, %2.val} gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with CONST2 REG uses reusing %2, REG={COND_RC, %2, %1.val} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG uses reusing %1, REG={COND_RR, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmu $1==INT32 /* Unsigned tristate compare */ with REG UCONST2 uses reusing %1, REG={CONDL_RC, %1, %2.val} gen rlwinm %a, %a, {C, 1}, {C, 31}, {C, 0} yields %a with UCONST2 REG uses reusing %2, REG={CONDL_RC, %2, %1.val} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a with REG REG uses reusing %1, REG={CONDL_RR, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmp /* Compare pointers */ leaving cmu INT32 pat cms $1==INT32 /* Compare blocks (word sized) */ leaving cmi INT32 pat cms defined($1) leaving loc $1 cal ".cms" pat cms !defined($1) leaving cal ".cms" /* Other branching and labelling */ pat lab topeltsize($1)==4 && !fallthrough($1) kills ALL gen labeldef $1 yields r3 pat lab topeltsize($1)==4 && fallthrough($1) with REG3 STACK kills ALL gen labeldef $1 yields r3 pat lab topeltsize($1)!=4 with STACK kills ALL gen labeldef $1 pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ with REG3 STACK gen b {LABEL, $1} pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ with STACK gen b {LABEL, $1} /* Miscellaneous */ pat cal /* Call procedure */ with STACK kills ALL gen bl {LABEL, $1} pat cai /* Call procedure indirect */ with REG STACK kills ALL gen mtspr ctr, %1 bctrl. pat lfr $1==INT32 /* Load function result, word */ yields r3 pat lfr $1==INT64 /* Load function result, double-word */ yields r4 r3 pat ret $1==0 /* Return from procedure */ gen /* Restore saved registers. */ return /* Epilog: restore lr and fp. */ lwz r0, {IND_RC_W, fp, 4} mtspr lr, r0 lwz r0, {IND_RC_W, fp, 0} /* Free our stack frame. */ addi sp, fp, {C, 8} mr fp, r0 blr. pat ret $1==4 /* Return from procedure, word */ with REG3 leaving ret 0 pat ret $1==8 /* Return from proc, double-word */ with REG3 REG gen move %2, r4 leaving ret 0 /* * These rules for blm/bls are wrong if length is zero. * So are several procedures in libem. */ pat blm /* Block move constant length */ leaving loc $1 bls pat bls /* Block move variable length */ with REG REG REG /* ( src%3 dst%2 len%1 -- ) */ uses reusing %1, REG, REG, REG gen srwi %a, %1, {C, 2} mtspr ctr, %a addi %b, %3, {C, 0-4} addi %c, %2, {C, 0-4} 1: lwzu %a, {IND_RC_W, %b, 4} stwu %a, {IND_RC_W, %c, 4} bdnz {LABEL, "1b"} pat csa /* Array-lookup switch */ with STACK kills ALL gen b {LABEL, ".csa"} pat csb /* Table-lookup switch */ with STACK kills ALL gen b {LABEL, ".csb"} /* EM specials */ pat fil /* Set current filename */ leaving lae $1 ste "hol0+4" pat lin /* Set current line number */ leaving loc $1 ste "hol0" pat lni /* Increment line number */ leaving ine "hol0" pat lim /* Load EM trap ignore mask */ leaving lde ".ignmask" pat sim /* Store EM trap ignore mask */ leaving ste ".ignmask" pat trp /* Raise EM trap */ with REG3 kills ALL gen bl {LABEL, ".trap"} pat sig /* Set trap handler */ leaving ste ".trppc" pat rtt /* Return from trap */ leaving ret 0 /* * Lexical local base: lxl 0 yields our fp, lxl n yields the * fp of the nth statically enclosing procedure. */ pat lxl $1==0 leaving lor 0 pat lxl $1==1 yields {IND_RC_W, fp, SL_OFFSET} pat lxl $1==2 uses REG={IND_RC_W, fp, SL_OFFSET} yields {IND_RC_W, %a, SL_OFFSET} pat lxl $1==3 uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG gen move {IND_RC_W, %a, SL_OFFSET}, %b yields {IND_RC_W, %b, SL_OFFSET} pat lxl $1>=4 && $1<=0x8000 uses REG={IND_RC_W, fp, SL_OFFSET}, REG={CONST_0000_7FFF, $1-1} gen mtspr ctr, %b 1: lwz %a, {IND_RC_W, %a, SL_OFFSET} bdnz {LABEL, "1b"} yields %a pat dch /* Dynamic chain: LB -> caller's LB */ with REG yields {IND_RC_W, %1, FP_OFFSET} pat lpb /* LB -> argument base */ leaving adp EM_BSIZE pat lxa /* Lexical argument base */ leaving lxl $1 lpb pat gto /* longjmp */ with STACK uses REG gen move {LABEL, $1}, %a move {IND_RC_W, %a, 8}, fp move {IND_RC_W, %a, 4}, sp move {IND_RC_W, %a, 0}, %a mtspr ctr, %a bctr. pat lor $1==0 /* Load local base */ uses REG gen move fp, %a yields %a pat lor $1==1 /* Load stack pointer */ uses REG gen move sp, %a yields %a pat str $1==0 /* Store local base */ with REG gen move %1, fp pat str $1==1 /* Store stack pointer */ with REG gen move %1, sp pat lae rck $2==4 /* Range check */ with REG kills ALL gen cmpwi %1, {C, rom($1, 1)} blt {LABEL, ".trap_erange"} cmpwi %1, {C, rom($1, 2)} bgt {LABEL, ".trap_erange"} yields %1 /* Single-precision floating-point */ pat zrf $1==INT32 /* Push zero */ leaving loe ".fs_00000000" pat adf $1==4 /* Add single */ with FSREG FSREG uses reusing %1, FSREG gen fadds %a, %2, %1 yields %a pat adf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fadds {LOCAL, $2}, %2, %1 pat sbf $1==4 /* Subtract single */ with FSREG FSREG uses reusing %1, FSREG gen fsubs %a, %2, %1 yields %a pat sbf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fsubs {LOCAL, $2}, %2, %1 pat mlf $1==4 /* Multiply single */ with FSREG FSREG uses reusing %1, FSREG gen fmuls %a, %2, %1 yields %a pat mlf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fmuls {LOCAL, $2}, %2, %1 pat dvf $1==INT32 /* Divide single */ with FSREG FSREG uses reusing %1, FSREG gen fdivs %a, %2, %1 yields %a pat dvf stl $1==4 && inreg($2)==reg_float with FSREG FSREG gen fdivs {LOCAL, $2}, %2, %1 pat ngf $1==INT32 /* Negate single */ with FSREG uses reusing %1, FSREG gen fneg %a, %1 yields %a pat ngf stl $1==4 && inreg($2)==reg_float with FSREG gen fneg {LOCAL, $2}, %1 pat cmf $1==INT32 /* Compare single */ with FSREG FSREG uses REG={COND_FS, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmf teq $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XEQ, %a} pat cmf tne $1==4 /* Single second == top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XNE, %a} pat cmf tgt $1==4 /* Single second > top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XGT, %a} pat cmf tge $1==4 /* Single second >= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XGE, %a} pat cmf tlt $1==4 /* Single second < top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XLT, %a} pat cmf tle $1==4 /* Single second <= top */ with FSREG FSREG uses REG={COND_FS, %2, %1} yields {XLE, %a} proc cmf4zxx example cmf zeq with FSREG FSREG STACK uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} /* Pop 2 singles, branch if... */ pat cmf zeq $1==4 call cmf4zxx("beq") pat cmf zne $1==4 call cmf4zxx("bne") pat cmf zgt $1==4 call cmf4zxx("bgt") pat cmf zge $1==4 call cmf4zxx("bge") pat cmf zlt $1==4 call cmf4zxx("blt") pat cmf zle $1==4 call cmf4zxx("ble") pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ with FSREG yields %1.1 /* Convert single to signed int */ pat loc loc cfi $1==4 && $2==4 leaving loc 4 loc 8 cff loc 8 loc 4 cfi /* Convert single to unsigned int */ pat loc loc cfu $1==4 && $2==4 leaving loc 4 loc 8 cff loc 8 loc 4 cfu /* Convert signed int to single */ pat loc loc cif $1==4 && $2==4 leaving loc 4 loc 8 cif loc 8 loc 4 cff /* Convert unsigned int to single */ pat loc loc cuf $1==4 && $2==4 leaving loc 4 loc 8 cuf loc 8 loc 4 cff /* Double-precision floating-point */ pat zrf $1==INT64 /* Push zero */ leaving lde ".fd_00000000" pat adf $1==8 /* Add double */ with FREG FREG uses reusing %1, FREG gen fadd %a, %2, %1 yields %a pat adf sdl $1==8 && inreg($2)==reg_float with FREG FREG gen fadd {DLOCAL, $2}, %2, %1 pat sbf $1==8 /* Subtract double */ with FREG FREG uses reusing %1, FREG gen fsub %a, %2, %1 yields %a pat sbf sdl $1==8 && inreg($2)==reg_float with FREG FREG gen fsub {DLOCAL, $2}, %2, %1 pat mlf $1==8 /* Multiply double */ with FREG FREG uses reusing %1, FREG gen fmul %a, %2, %1 yields %a pat mlf sdl $1==8 && inreg($2)==reg_float with FREG FREG gen fmul {DLOCAL, $2}, %2, %1 pat dvf $1==8 /* Divide double */ with FREG FREG uses reusing %1, FREG gen fdiv %a, %2, %1 yields %a pat dvf sdl $1==8 && inreg($2)==reg_float with FREG FREG gen fdiv {DLOCAL, $2}, %2, %1 pat ngf $1==8 /* Negate double */ with FREG uses reusing %1, FREG gen fneg %a, %1 yields %a pat ngf sdl $1==8 && inreg($2)==reg_float with FREG gen fneg {DLOCAL, $2}, %1 pat cmf $1==INT64 /* Compare double */ with FREG FREG uses REG={COND_FD, %2, %1} gen extlwi %a, %a, {C, 2}, {C, 0} yields %a pat cmf teq $1==8 /* Double second == top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XEQ, %a} pat cmf tne $1==8 /* Single second == top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XNE, %a} pat cmf tgt $1==8 /* Double second > top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XGT, %a} pat cmf tge $1==8 /* Double second >= top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XGE, %a} pat cmf tlt $1==8 /* Double second < top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XLT, %a} pat cmf tle $1==8 /* Double second <= top */ with FREG FREG uses REG={COND_FD, %2, %1} yields {XLE, %a} proc cmf8zxx example cmf zeq with FREG FREG STACK uses REG gen fcmpo cr0, %2, %1 bxx* {LABEL, $2} /* Pop 2 doubles, branch if... */ pat cmf zeq $1==8 call cmf8zxx("beq") pat cmf zne $1==8 call cmf8zxx("bne") pat cmf zgt $1==8 call cmf8zxx("bgt") pat cmf zge $1==8 call cmf8zxx("bge") pat cmf zlt $1==8 call cmf8zxx("blt") pat cmf zle $1==8 call cmf8zxx("ble") /* Convert double to single */ /* reg_float pattern must be first, or it goes unused! */ pat loc loc cff stl $1==8 && $2==4 && inreg($4)==reg_float with FREG gen frsp {LOCAL, $4}, %1 pat loc loc cff $1==8 && $2==4 with FREG uses reusing %1, FSREG gen frsp %a, %1 yields %a /* Convert double to signed int */ pat loc loc cfi $1==8 && $2==4 with FREG STACK uses reusing %1, FREG gen fctiwz %a, %1 stfdu %a, {IND_RC_D, sp, 0-8} addi sp, sp, {C, 4} /* Convert double to unsigned int */ pat loc loc cfu $1==8 && $2==4 leaving cal ".cfu8" /* Convert signed int to double */ pat loc loc cif $1==4 && $2==8 leaving cal ".cif8" /* Convert unsigned int to double */ pat loc loc cuf $1==4 && $2==8 leaving cal ".cuf8" pat fef $1==8 /* Split fraction, exponent */ leaving cal ".fef8" /* Multiply two doubles, then split fraction, integer */ pat fif $1==8 leaving cal ".fif8"