2007-11-02 18:56:58 +00:00
|
|
|
EM_WSIZE = 4
|
|
|
|
EM_PSIZE = 4
|
|
|
|
EM_BSIZE = 8 /* two words saved in call frame */
|
|
|
|
|
|
|
|
INT8 = 1 /* Size of values */
|
|
|
|
INT16 = 2
|
|
|
|
INT32 = 4
|
|
|
|
INT64 = 8
|
|
|
|
|
|
|
|
FP_OFFSET = 0 /* Offset of saved FP relative to our FP */
|
|
|
|
PC_OFFSET = 4 /* Offset of saved PC relative to our FP */
|
2017-02-14 04:22:31 +00:00
|
|
|
SL_OFFSET = 8 /* Offset of static link */
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-30 20:45:46 +00:00
|
|
|
#define COMMENT(n) /* comment {LABEL, n} */
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64)
|
|
|
|
|
|
|
|
#define smalls(n) sfit(n, 16)
|
|
|
|
#define smallu(n) ufit(n, 16)
|
|
|
|
|
2016-10-15 03:59:26 +00:00
|
|
|
#define lo(n) ((n) & 0xFFFF)
|
|
|
|
#define hi(n) (((n)>>16) & 0xFFFF)
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Use these for instructions that treat the low half as signed --- his()
|
|
|
|
* includes a modifier to produce the correct value when the low half gets
|
|
|
|
* sign extended. Er, do make sure you load the low half second. */
|
2016-10-15 03:59:26 +00:00
|
|
|
#define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF)))
|
|
|
|
#define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF)
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
PROPERTIES
|
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
GPR /* general-purpose register */
|
|
|
|
REG /* allocatable GPR */
|
|
|
|
REG3 /* coercion to r3 */
|
|
|
|
|
|
|
|
FPR(8) /* floating-point register */
|
|
|
|
FREG(8) /* allocatable FPR */
|
|
|
|
FSREG /* allocatable single-precision FPR */
|
|
|
|
|
|
|
|
SPR /* special-purpose register */
|
|
|
|
CR /* condition register */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
REGISTERS
|
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
/*
|
|
|
|
* When ncg allocates regvars, it seems to start with the last
|
|
|
|
* register in the first class. To encourage ncg to allocate
|
|
|
|
* them from r31 down, we list them in one class as
|
|
|
|
* r13, r14, ..., r31: GPR, REG regvar(reg_any).
|
|
|
|
*/
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
r0, sp, fp : GPR.
|
|
|
|
r3 : GPR, REG, REG3.
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
r4, r5, r6, r7, r8, r9, r10, r11, r12
|
|
|
|
: GPR, REG.
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24,
|
|
|
|
r25, r26, r27, r28, r29, r30, r31
|
|
|
|
: GPR, REG regvar(reg_any).
|
2013-05-07 23:48:48 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
f0 : FPR.
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13
|
|
|
|
: FPR, FREG.
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25,
|
|
|
|
f26, f27, f28, f29, f30, f31
|
|
|
|
: FPR, FREG regvar(reg_float).
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-02-18 00:32:27 +00:00
|
|
|
fs1("f1")=f1, fs2("f2")=f2, fs3("f3")=f3, fs4("f4")=f4,
|
|
|
|
fs5("f5")=f5, fs6("f6")=f6, fs7("f7")=f7, fs8("f8")=f8,
|
|
|
|
fs9("f9")=f9, fs10("f10")=f10, fs11("f11")=f11, fs12("f12")=f12,
|
|
|
|
fs13("f13")=f13
|
|
|
|
: FSREG.
|
2017-02-13 22:44:46 +00:00
|
|
|
|
2017-10-14 16:40:04 +00:00
|
|
|
/* reglap: reg_float may have subregister of different size */
|
|
|
|
fs14("f14")=f14, fs15("f15")=f15, fs16("f16")=f16, fs17("f17")=f17,
|
|
|
|
fs18("f18")=f18, fs19("f19")=f19, fs20("f20")=f20, fs21("f21")=f21,
|
|
|
|
fs22("f22")=f22, fs23("f23")=f23, fs24("f24")=f24, fs25("f25")=f25,
|
|
|
|
fs26("f26")=f26, fs27("f27")=f27, fs28("f28")=f28, fs29("f29")=f29,
|
|
|
|
fs30("f30")=f30, fs31("f31")=f31
|
|
|
|
: FSREG regvar(reg_float).
|
|
|
|
|
2017-02-13 22:44:46 +00:00
|
|
|
lr, ctr : SPR.
|
|
|
|
cr0 : CR.
|
|
|
|
|
|
|
|
#define RSCRATCH r0
|
|
|
|
#define FSCRATCH f0
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
TOKENS
|
|
|
|
|
|
|
|
/* Primitives */
|
|
|
|
|
2017-02-02 15:48:25 +00:00
|
|
|
CONST = { INT val; } 4 val.
|
2007-11-02 18:56:58 +00:00
|
|
|
LABEL = { ADDR adr; } 4 adr.
|
2017-02-08 17:12:28 +00:00
|
|
|
LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]".
|
|
|
|
LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]".
|
|
|
|
LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]".
|
|
|
|
LOCAL = { INT off; } 4 ">>> BUG IN LOCAL".
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
DLOCAL = { INT off; } 8 ">>> BUG IN DLOCAL".
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Allows us to use regvar() to refer to registers */
|
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
GPR_EXPR = { GPR reg; } 4 reg.
|
|
|
|
FPR_EXPR = { FPR reg; } 8 reg.
|
|
|
|
FSREG_EXPR = { FSREG reg; } 4 reg.
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
/* Constants on the stack */
|
|
|
|
|
|
|
|
CONST_N8000 = { INT val; } 4.
|
|
|
|
CONST_N7FFF_N0001 = { INT val; } 4.
|
|
|
|
CONST_0000_7FFF = { INT val; } 4.
|
|
|
|
CONST_8000 = { INT val; } 4.
|
|
|
|
CONST_8001_FFFF = { INT val; } 4.
|
|
|
|
CONST_HZ = { INT val; } 4.
|
|
|
|
CONST_HL = { INT val; } 4.
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Expression partial results */
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */
|
|
|
|
SUM_RC = { GPR reg; INT off; } 4. /* reg + off */
|
|
|
|
SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */
|
|
|
|
SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
SEX_B = { GPR reg; } 4.
|
|
|
|
SEX_H = { GPR reg; } 4.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")".
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_RR_B = { GPR reg1; GPR reg2; } 4.
|
|
|
|
IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")".
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_RL_H = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_RR_H = { GPR reg1; GPR reg2; } 4.
|
|
|
|
IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")".
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_RL_H_S = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_RR_H_S = { GPR reg1; GPR reg2; } 4.
|
|
|
|
IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")".
|
|
|
|
IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")".
|
|
|
|
IND_RR_W = { GPR reg1; GPR reg2; } 4.
|
|
|
|
IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")".
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")".
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_RR_D = { GPR reg1; GPR reg2; } 8.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
NOT_R = { GPR reg; } 4.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
AND_RR = { GPR reg1; GPR reg2; } 4.
|
|
|
|
OR_RR = { GPR reg1; GPR reg2; } 4.
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
OR_RIS = { GPR reg; INT valhi; } 4.
|
|
|
|
OR_RC = { GPR reg; INT val; } 4.
|
2007-11-02 18:56:58 +00:00
|
|
|
XOR_RR = { GPR reg1; GPR reg2; } 4.
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
XOR_RIS = { GPR reg; INT valhi; } 4.
|
|
|
|
XOR_RC = { GPR reg; INT val; } 4.
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
COND_RC = { GPR reg; INT val; } 4.
|
|
|
|
COND_RR = { GPR reg1; GPR reg2; } 4.
|
|
|
|
CONDL_RC = { GPR reg; INT val; } 4.
|
|
|
|
CONDL_RR = { GPR reg1; GPR reg2; } 4.
|
|
|
|
COND_FS = { FSREG reg1; FSREG reg2; } 4.
|
|
|
|
COND_FD = { FREG reg1; FREG reg2; } 4.
|
|
|
|
|
|
|
|
XEQ = { GPR reg; } 4.
|
|
|
|
XNE = { GPR reg; } 4.
|
|
|
|
XGT = { GPR reg; } 4.
|
|
|
|
XGE = { GPR reg; } 4.
|
|
|
|
XLT = { GPR reg; } 4.
|
|
|
|
XLE = { GPR reg; } 4.
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
SETS
|
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
/* signed 16-bit integer */
|
|
|
|
CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF.
|
|
|
|
/* integer that, when negated, fits signed 16-bit */
|
|
|
|
CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000.
|
|
|
|
/* unsigned 16-bit integer */
|
|
|
|
UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF.
|
|
|
|
/* any constant on stack */
|
2017-02-08 17:12:28 +00:00
|
|
|
CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF +
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL.
|
|
|
|
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
SUM_ALL = SUM_RC + SUM_RL + SUM_RR.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
SEX_ALL = SEX_B + SEX_H.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR +
|
2007-11-02 18:56:58 +00:00
|
|
|
XOR_RC.
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B.
|
|
|
|
IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H +
|
|
|
|
IND_RC_H_S + IND_RL_H_S + IND_RR_H_S.
|
2017-02-02 15:48:25 +00:00
|
|
|
IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W.
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D.
|
2017-02-08 17:12:28 +00:00
|
|
|
IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W.
|
2016-10-16 22:13:39 +00:00
|
|
|
|
|
|
|
/* anything killed by sti (store indirect) */
|
|
|
|
MEMORY = IND_ALL_BHW + IND_ALL_D.
|
|
|
|
|
2017-02-08 17:12:28 +00:00
|
|
|
/* any stack token that we can easily move to GPR */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
ANY_BHW = REG + CONST_STACK + SEX_ALL +
|
2017-02-08 17:12:28 +00:00
|
|
|
SUM_ALL + IND_ALL_BHW + LOGICAL_ALL.
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
INSTRUCTIONS
|
|
|
|
|
2016-10-17 18:57:21 +00:00
|
|
|
/* We give time as cycles of total latency from Freescale
|
|
|
|
* Semiconductor, MPC7450 RISC Microprocessor Family Reference
|
|
|
|
* Manual, Rev. 5, section 6.6.
|
|
|
|
*
|
|
|
|
* We have only 4-byte alignment for doubles; 8-byte alignment is
|
|
|
|
* optimal. We guess the misalignment penalty by adding 1 cycle to
|
|
|
|
* the cost of loading or storing a double:
|
|
|
|
* lfd lfdu lfdx: 4 -> 5
|
|
|
|
* stfd stfdu stfdx: 3 -> 4
|
|
|
|
*/
|
|
|
|
cost(4, 1) /* space, time */
|
|
|
|
|
2016-10-07 02:59:27 +00:00
|
|
|
add GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
addX "add." GPR:wo, GPR:ro, GPR:ro.
|
2017-02-02 15:48:25 +00:00
|
|
|
addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
|
2017-02-08 17:12:28 +00:00
|
|
|
li GPR:wo, CONST:ro.
|
|
|
|
addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro.
|
|
|
|
lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro.
|
2016-10-07 02:59:27 +00:00
|
|
|
and GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
andc GPR:wo, GPR:ro, GPR:ro.
|
2016-10-17 18:57:21 +00:00
|
|
|
andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro.
|
|
|
|
andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro.
|
2007-11-02 18:56:58 +00:00
|
|
|
b LABEL:ro.
|
|
|
|
bc CONST:ro, CONST:ro, LABEL:ro.
|
2017-02-12 00:30:12 +00:00
|
|
|
bdnz LABEL:ro.
|
2017-01-26 00:08:55 +00:00
|
|
|
beq LABEL:ro.
|
|
|
|
bne LABEL:ro.
|
|
|
|
bgt LABEL:ro.
|
|
|
|
bge LABEL:ro.
|
|
|
|
blt LABEL:ro.
|
|
|
|
ble LABEL:ro.
|
|
|
|
bxx LABEL:ro. /* dummy */
|
2007-11-02 18:56:58 +00:00
|
|
|
bcctr CONST:ro, CONST:ro, CONST:ro.
|
2017-01-26 00:08:55 +00:00
|
|
|
bctr.
|
2007-11-02 18:56:58 +00:00
|
|
|
bcctrl CONST:ro, CONST:ro, CONST:ro.
|
2017-01-26 00:08:55 +00:00
|
|
|
bctrl.
|
2007-11-02 18:56:58 +00:00
|
|
|
bclr CONST:ro, CONST:ro, CONST:ro.
|
2017-02-17 02:18:39 +00:00
|
|
|
blr.
|
2007-11-02 18:56:58 +00:00
|
|
|
bl LABEL:ro.
|
2016-10-07 02:59:27 +00:00
|
|
|
cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc.
|
2017-01-26 00:08:55 +00:00
|
|
|
cmpw GPR:ro, GPR:ro kills :cc.
|
2016-10-07 02:59:27 +00:00
|
|
|
cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc.
|
2017-01-26 00:08:55 +00:00
|
|
|
cmpwi GPR:ro, CONST:ro kills :cc.
|
2016-10-07 02:59:27 +00:00
|
|
|
cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc.
|
2017-01-26 00:08:55 +00:00
|
|
|
cmplw GPR:ro, GPR:ro kills :cc.
|
2016-10-07 02:59:27 +00:00
|
|
|
cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc.
|
2017-01-26 00:08:55 +00:00
|
|
|
cmplwi GPR:ro, CONST:ro kills :cc.
|
2016-10-17 18:57:21 +00:00
|
|
|
divw GPR:wo, GPR:ro, GPR:ro cost(4, 23).
|
|
|
|
divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23).
|
2016-10-07 02:59:27 +00:00
|
|
|
eqv GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
extsb GPR:wo, GPR:ro.
|
|
|
|
extsh GPR:wo, GPR:ro.
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
fadd FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
2017-10-17 21:53:03 +00:00
|
|
|
fadds FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
2017-01-26 00:08:55 +00:00
|
|
|
fcmpo CR:wo, FREG:ro, FREG:ro cost(4, 5).
|
|
|
|
fcmpo CR:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
2017-02-12 04:23:47 +00:00
|
|
|
fctiwz FREG:wo, FREG:ro.
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
fdiv FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 35).
|
2017-10-17 21:53:03 +00:00
|
|
|
fdivs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 21).
|
2017-10-17 18:15:33 +00:00
|
|
|
fmr FPR:wo, FPR:ro cost(4, 5).
|
|
|
|
fmr FSREG:wo, FSREG:ro cost(4, 5).
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
fmul FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
2017-10-17 21:53:03 +00:00
|
|
|
fmuls FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
fneg FREG+DLOCAL:wo, FREG:ro cost(4, 5).
|
2017-10-17 21:53:03 +00:00
|
|
|
fneg FSREG+LOCAL:wo, FSREG:ro cost(4, 5).
|
2016-10-17 18:57:21 +00:00
|
|
|
frsp FSREG:wo, FREG:ro cost(4, 5).
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
fsub FREG+DLOCAL:wo, FREG:ro, FREG:ro cost(4, 5).
|
2017-10-17 21:53:03 +00:00
|
|
|
fsubs FSREG+LOCAL:wo, FSREG:ro, FSREG:ro cost(4, 5).
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
lfd FPR+DLOCAL:wo, IND_RC_D+IND_RL_D:ro cost(4, 5).
|
2017-02-08 17:12:28 +00:00
|
|
|
lfdu FPR:wo, IND_RC_D:ro cost(4, 5).
|
2017-10-17 18:15:33 +00:00
|
|
|
lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5).
|
2017-10-14 16:40:04 +00:00
|
|
|
lfs FSREG+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 4).
|
2017-02-08 17:12:28 +00:00
|
|
|
lfsu FSREG:wo, IND_RC_W:rw cost(4, 4).
|
2017-10-17 18:15:33 +00:00
|
|
|
lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4).
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
2017-10-19 16:44:46 +00:00
|
|
|
lwzu GPR:wo, IND_RC_W:rw cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3).
|
2017-10-17 18:15:33 +00:00
|
|
|
lwz GPR+LOCAL:wo, IND_RC_W+IND_RL_W:ro cost(4, 3).
|
2016-10-07 02:59:27 +00:00
|
|
|
nand GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
neg GPR:wo, GPR:ro.
|
|
|
|
nor GPR:wo, GPR:ro, GPR:ro.
|
2016-10-17 18:57:21 +00:00
|
|
|
mfcr GPR:wo cost(4,2).
|
|
|
|
mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4).
|
|
|
|
mfspr GPR:wo, SPR:ro cost(4, 3).
|
|
|
|
mtspr SPR:wo, GPR:ro cost(4, 2).
|
2016-10-07 02:59:27 +00:00
|
|
|
or GPR:wo, GPR:ro, GPR:ro.
|
2017-02-10 16:45:50 +00:00
|
|
|
mr GPR:wo, GPR:ro.
|
|
|
|
orX "or." GPR:wo:cc, GPR:ro, GPR:ro.
|
|
|
|
orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro.
|
2016-10-07 02:59:27 +00:00
|
|
|
orc GPR:wo, GPR:ro, GPR:ro.
|
2017-02-08 17:12:28 +00:00
|
|
|
ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro.
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
oris GPR:wo, GPR:ro, CONST:ro.
|
2016-10-07 02:59:27 +00:00
|
|
|
rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro.
|
2017-01-26 00:08:55 +00:00
|
|
|
extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
|
|
|
|
extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro.
|
2017-02-12 00:30:12 +00:00
|
|
|
srwi GPR:wo, GPR:ro, CONST:ro.
|
2016-10-07 02:59:27 +00:00
|
|
|
slw GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
subf GPR:wo, GPR:ro, GPR:ro.
|
2016-10-17 18:57:21 +00:00
|
|
|
sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2).
|
|
|
|
srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2).
|
2016-10-07 02:59:27 +00:00
|
|
|
srw GPR:wo, GPR:ro, GPR:ro.
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4).
|
2017-10-17 18:15:33 +00:00
|
|
|
stfdu FPR:ro, IND_RC_D:rw cost(4, 4).
|
2016-10-17 18:57:21 +00:00
|
|
|
stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4).
|
2017-02-02 15:48:25 +00:00
|
|
|
stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
|
2017-10-17 18:15:33 +00:00
|
|
|
stfsu FSREG:ro, IND_RC_W:rw cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3).
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
2017-02-02 15:48:25 +00:00
|
|
|
stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3).
|
2016-10-17 18:57:21 +00:00
|
|
|
stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3).
|
2017-10-17 18:15:33 +00:00
|
|
|
stwu GPR:ro, IND_RC_W:rw cost(4, 3).
|
2016-10-07 02:59:27 +00:00
|
|
|
xor GPR:wo, GPR:ro, GPR:ro.
|
|
|
|
xori GPR:wo, GPR:ro, CONST:ro.
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
xoris GPR:wo, GPR:ro, CONST:ro.
|
2016-09-27 20:46:11 +00:00
|
|
|
|
2017-10-15 19:22:52 +00:00
|
|
|
bug ">>> BUG" LABEL:ro cost(0, 0).
|
2017-01-14 23:15:01 +00:00
|
|
|
comment "!" LABEL:ro cost(0, 0).
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
MOVES
|
|
|
|
|
|
|
|
from GPR to GPR
|
2017-02-10 16:45:50 +00:00
|
|
|
gen mr %2, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-10-14 16:40:04 +00:00
|
|
|
from FSREG to FSREG
|
|
|
|
gen fmr %2, %1
|
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
from FPR to FPR
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
gen fmr %2, %1
|
|
|
|
|
2017-02-08 17:12:28 +00:00
|
|
|
/* Constants */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-08 17:12:28 +00:00
|
|
|
from CONST + CONST_STACK smalls(%val) to GPR
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-08 17:12:28 +00:00
|
|
|
COMMENT("move CONST->GPR smalls")
|
|
|
|
li %2, {CONST, %1.val}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-08 17:12:28 +00:00
|
|
|
from CONST + CONST_STACK lo(%val)==0 to GPR
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-08 17:12:28 +00:00
|
|
|
COMMENT("move CONST->GPR shifted")
|
|
|
|
lis %2, {CONST, hi(%1.val)}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
2017-02-08 17:12:28 +00:00
|
|
|
from CONST + CONST_STACK to GPR
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-08 17:12:28 +00:00
|
|
|
COMMENT("move CONST->GPR")
|
|
|
|
lis %2, {CONST, hi(%1.val)}
|
2007-11-02 18:56:58 +00:00
|
|
|
ori %2, %2, {CONST, lo(%1.val)}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
/* Can't use addi %2, %2, {CONST, los(%1.val)}
|
|
|
|
* because %2 might be R0. */
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from LABEL to GPR
|
|
|
|
gen
|
|
|
|
COMMENT("move LABEL->GPR")
|
2017-02-08 17:12:28 +00:00
|
|
|
lis %2, {LABEL_HI, %1.adr}
|
|
|
|
ori %2, %2, {LABEL_LO, %1.adr}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-02 15:48:25 +00:00
|
|
|
from LABEL_HA to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lis %2, %1
|
2017-02-02 15:48:25 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Sign extension */
|
|
|
|
|
|
|
|
from SEX_B to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen extsb %2, %1.reg
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from SEX_H to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen extsh %2, %1.reg
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Register + something */
|
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
from SUM_RIS to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen addis %2, %1.reg, {CONST, %1.offhi}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from SUM_RC to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen addi %2, %1.reg, {CONST, %1.off}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
from SUM_RL to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen addi %2, %1.reg, {LABEL_LO, %1.adr}
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from SUM_RR to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen add %2, %1.reg1, %1.reg2
|
2016-10-07 02:59:27 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
/* Read byte */
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from IND_RC_B+IND_RL_B to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lbz %2, %1
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
from IND_RR_B to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lbzx %2, %1.reg1, %1.reg2
|
2016-10-16 22:13:39 +00:00
|
|
|
|
|
|
|
/* Write byte */
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from GPR to IND_RC_B+IND_RL_B
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stb %1, %2
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
from GPR to IND_RR_B
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stbx %1, %2.reg1, %2.reg2
|
2016-10-16 22:13:39 +00:00
|
|
|
|
|
|
|
/* Read halfword (short) */
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from IND_RC_H+IND_RL_H to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lhz %2, %1
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
from IND_RR_H to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lhzx %2, %1.reg1, %1.reg2
|
2016-10-16 22:13:39 +00:00
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from IND_RC_H_S+IND_RL_H_S to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lha %2, %1
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
from IND_RR_H_S to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lhax %2, %1.reg1, %1.reg2
|
2016-10-16 22:13:39 +00:00
|
|
|
|
|
|
|
/* Write halfword */
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from GPR to IND_RC_H+IND_RL_H
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen sth %1, %2
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2016-10-16 22:13:39 +00:00
|
|
|
from GPR to IND_RR_H
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen sthx %1, %2.reg1, %2.reg2
|
2016-10-16 22:13:39 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Read word */
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from IND_RC_W+IND_RL_W to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lwz %2, %1
|
2017-02-02 15:48:25 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from IND_RR_W to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lwzx %2, %1.reg1, %1.reg2
|
2016-10-16 20:02:25 +00:00
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from IND_RC_W+IND_RL_W to FSREG
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lfs %2, %1
|
2017-02-02 15:48:25 +00:00
|
|
|
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
from IND_RR_W to FSREG
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lfsx %2, %1.reg1, %1.reg2
|
2016-10-16 20:33:24 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Write word */
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from GPR to IND_RC_W+IND_RL_W
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stw %1, %2
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from GPR to IND_RR_W
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stwx %1, %2.reg1, %2.reg2
|
2016-10-16 20:02:25 +00:00
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from FSREG to IND_RC_W+IND_RL_W
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stfs %1, %2
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
from FSREG to IND_RR_W
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stfsx %1, %2.reg1, %2.reg2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Read double */
|
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
from IND_RC_D+IND_RL_D to FPR
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
gen lfd %2, %1
|
2016-10-16 20:02:25 +00:00
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
from IND_RR_D to FPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen lfdx %2, %1.reg1, %1.reg2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Write double */
|
|
|
|
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
from FPR to IND_RC_D+IND_RL_D
|
|
|
|
gen stfd %1, %2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
from FPR to IND_RR_D
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen stfdx %1, %2.reg1, %2.reg2
|
2016-10-16 20:33:24 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Logicals */
|
|
|
|
|
|
|
|
from NOT_R to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen nor %2, %1.reg, %1.reg
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from AND_RR to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen and %2, %1.reg1, %1.reg2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from OR_RR to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen or %2, %1.reg1, %1.reg2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
from OR_RIS to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen oris %2, %1.reg, {CONST, %1.valhi}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from OR_RC to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen ori %2, %1.reg, {CONST, %1.val}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from XOR_RR to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen xor %2, %1.reg1, %1.reg2
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
from XOR_RIS to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen xoris %2, %1.reg, {CONST, %1.valhi}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from XOR_RC to GPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen xori %2, %1.reg, {CONST, %1.val}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
/* Conditions */
|
|
|
|
|
|
|
|
/* Compare values, then copy cr0 to GPR. */
|
|
|
|
|
|
|
|
from COND_RC to GPR
|
|
|
|
gen
|
|
|
|
cmpwi %1.reg, {CONST, %1.val}
|
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
from COND_RR to GPR
|
|
|
|
gen
|
|
|
|
cmpw %1.reg1, %1.reg2
|
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
from CONDL_RC to GPR
|
|
|
|
gen
|
|
|
|
cmplwi %1.reg, {CONST, %1.val}
|
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
from CONDL_RR to GPR
|
|
|
|
gen
|
|
|
|
cmplw %1.reg1, %1.reg2
|
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
from COND_FS to GPR
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
fcmpo cr0, %1.reg1, %1.reg2
|
2017-01-26 00:08:55 +00:00
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
from COND_FD to GPR
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
fcmpo cr0, %1.reg1, %1.reg2
|
2017-01-26 00:08:55 +00:00
|
|
|
mfcr %2
|
|
|
|
|
|
|
|
/* Given a copy of cr0 in %1.reg, extract a condition bit
|
|
|
|
* (lt, gt, eq) and perhaps flip it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
from XEQ to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 2}
|
|
|
|
|
|
|
|
from XNE to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 2}
|
|
|
|
xori %2, %2, {CONST, 1}
|
|
|
|
|
|
|
|
from XGT to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 1}
|
|
|
|
|
|
|
|
from XGE to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 0}
|
|
|
|
xori %2, %2, {CONST, 1}
|
|
|
|
|
|
|
|
from XLT to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 0}
|
|
|
|
|
|
|
|
from XLE to GPR
|
|
|
|
gen
|
|
|
|
extrwi %2, %1.reg, {CONST, 1}, {CONST, 1}
|
|
|
|
xori %2, %2, {CONST, 1}
|
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
/* GPR_EXPR exists solely to allow us to use regvar() (which can only
|
|
|
|
be used in an expression) as a register constant. We can then use
|
|
|
|
our moves to GPR to set register variables. We define no moves to
|
|
|
|
LOCAL, so we avoid confusion between GPR and FSREG in LOCAL. */
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-10-17 18:15:33 +00:00
|
|
|
from ANY_BHW to GPR_EXPR
|
|
|
|
gen move %1, %2.reg
|
|
|
|
|
|
|
|
from FPR+IND_ALL_D to FPR_EXPR
|
|
|
|
gen move %1, %2.reg
|
|
|
|
|
|
|
|
from FSREG+IND_ALL_W to FSREG_EXPR
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %1, %2.reg
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
TESTS
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
/* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1,
|
|
|
|
* not allowed here". We use orX_readonly to trick ncgg.
|
2017-02-10 16:45:50 +00:00
|
|
|
*
|
|
|
|
* Using "or." and not "mr." because mach/powerpc/top/table
|
|
|
|
* was optimizing "or." and not "mr.".
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
*/
|
2007-11-02 18:56:58 +00:00
|
|
|
to test GPR
|
|
|
|
gen
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
orX_readonly %1, %1, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
STACKINGRULES
|
2016-09-27 20:46:11 +00:00
|
|
|
|
2016-10-07 00:47:42 +00:00
|
|
|
from REG to STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2016-10-07 00:47:42 +00:00
|
|
|
COMMENT("stack REG")
|
2017-02-13 22:44:46 +00:00
|
|
|
stwu %1, {IND_RC_W, sp, 0-4}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
2017-02-10 16:45:50 +00:00
|
|
|
from ANY_BHW-REG to STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-10 16:45:50 +00:00
|
|
|
COMMENT("stack ANY_BHW-REG")
|
2016-10-07 00:47:42 +00:00
|
|
|
move %1, RSCRATCH
|
2017-02-13 22:44:46 +00:00
|
|
|
stwu RSCRATCH, {IND_RC_W, sp, 0-4}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from IND_ALL_D to STACK
|
|
|
|
gen
|
2016-10-18 00:31:59 +00:00
|
|
|
COMMENT("stack IND_ALL_D")
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
move %1, FSCRATCH
|
2017-02-13 22:44:46 +00:00
|
|
|
stfdu FSCRATCH, {IND_RC_D, sp, 0-8}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-15 19:22:52 +00:00
|
|
|
from FREG to STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-10-17 18:15:33 +00:00
|
|
|
COMMENT("stack FREG")
|
2017-02-13 22:44:46 +00:00
|
|
|
stfdu %1, {IND_RC_D, sp, 0-8}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
from FSREG to STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
COMMENT("stack FSREG")
|
2017-02-13 22:44:46 +00:00
|
|
|
stfsu %1, {IND_RC_W, sp, 0-4}
|
2016-09-27 20:46:11 +00:00
|
|
|
|
2017-10-15 19:22:52 +00:00
|
|
|
/*
|
|
|
|
* We never stack LOCAL or DLOCAL tokens, because we only use
|
|
|
|
* them for register variables, so ncg pushes the register,
|
|
|
|
* not the token. These rules only prevent an error in ncgg.
|
|
|
|
*/
|
|
|
|
from LOCAL to STACK
|
|
|
|
gen bug {LABEL, "STACKING LOCAL"}
|
|
|
|
from DLOCAL to STACK
|
|
|
|
gen bug {LABEL, "STACKING DLOCAL"}
|
|
|
|
|
2016-09-27 20:46:11 +00:00
|
|
|
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
COERCIONS
|
|
|
|
|
|
|
|
from STACK
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
COMMENT("coerce STACK->REG")
|
2017-02-13 22:44:46 +00:00
|
|
|
lwz %a, {IND_RC_W, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 4}
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-10-18 00:31:59 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
from STACK
|
|
|
|
uses FREG
|
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
COMMENT("coerce STACK->FREG")
|
2017-02-13 22:44:46 +00:00
|
|
|
lfd %a, {IND_RC_D, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 8}
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
from STACK
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
uses FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
COMMENT("coerce STACK->FSREG")
|
2017-02-13 22:44:46 +00:00
|
|
|
lfs %a, {IND_RC_W, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 4}
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
yields %a
|
2016-10-18 00:31:59 +00:00
|
|
|
|
2017-10-16 16:07:55 +00:00
|
|
|
from ANY_BHW
|
|
|
|
uses REG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-10-16 16:07:55 +00:00
|
|
|
COMMENT("coerce ANY_BHW->REG")
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
move %1, %a
|
|
|
|
yields %a
|
2016-10-18 00:31:59 +00:00
|
|
|
|
|
|
|
/*
|
2017-10-16 16:07:55 +00:00
|
|
|
* There is no coercion from IND_ALL_D to REG REG, because
|
|
|
|
* coercions can't allocate registers for intermediate values.
|
|
|
|
*
|
|
|
|
* A coercion to split IND_RC_D into two IND_RC_W, without
|
|
|
|
* allocating an intermediate register, would yield
|
|
|
|
* {IND_RC_W, %1.val, %1.off+4}
|
|
|
|
* but %1.off+4 might overflow a signed 16-bit integer.
|
2016-10-18 00:31:59 +00:00
|
|
|
*/
|
|
|
|
|
2017-10-16 16:07:55 +00:00
|
|
|
from FREG+IND_ALL_D
|
2007-11-02 18:56:58 +00:00
|
|
|
uses FREG
|
|
|
|
gen
|
2017-10-16 16:07:55 +00:00
|
|
|
COMMENT("coerce FREG+IND_ALL_D->FREG")
|
|
|
|
move %1, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
from FSREG+IND_ALL_W
|
|
|
|
uses FSREG
|
|
|
|
gen
|
|
|
|
COMMENT("coerce FSREG+IND_ALL_W->FREG")
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
move %1, %a
|
|
|
|
yields %a
|
2016-10-18 00:31:59 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
PATTERNS
|
|
|
|
|
|
|
|
/* Intrinsics */
|
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
pat loc $1==(0-0x8000) /* Load constant */
|
|
|
|
yields {CONST_N8000, $1}
|
|
|
|
pat loc $1>=(0-0x7FFF) && $1<=(0-1)
|
|
|
|
yields {CONST_N7FFF_N0001, $1}
|
|
|
|
pat loc $1>=0 && $1<=0x7FFF
|
|
|
|
yields {CONST_0000_7FFF, $1}
|
|
|
|
pat loc $1==0x8000
|
|
|
|
yields {CONST_8000, $1}
|
|
|
|
pat loc $1>=0x8001 && $1<=0xFFFF
|
|
|
|
yields {CONST_8001_FFFF, $1}
|
|
|
|
pat loc lo($1)==0
|
|
|
|
yields {CONST_HZ, $1}
|
|
|
|
pat loc
|
|
|
|
yields {CONST_HL, $1}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat dup $1==INT32 /* Duplicate word on top of stack */
|
2016-10-07 00:47:42 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %1 %1
|
2016-09-30 15:50:50 +00:00
|
|
|
with FSREG
|
|
|
|
yields %1 %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat dup $1==INT64 /* Duplicate double-word on top of stack */
|
2016-10-07 00:47:42 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %2 %1 %2 %1
|
2016-09-30 15:50:50 +00:00
|
|
|
with FREG
|
|
|
|
yields %1 %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat exg $1==INT32 /* Exchange top two words on stack */
|
2016-10-07 00:47:42 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %1 %2
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat stl lol $1==$2 /* Store then load local */
|
|
|
|
leaving
|
|
|
|
dup 4
|
|
|
|
stl $1
|
2016-09-30 15:50:50 +00:00
|
|
|
|
|
|
|
pat sdl ldl $1==$2 /* Store then load double local */
|
|
|
|
leaving
|
|
|
|
dup 8
|
|
|
|
sdl $1
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */
|
|
|
|
leaving
|
|
|
|
dup INT32
|
|
|
|
lal $1
|
|
|
|
sti $2
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ste loe $1==$2 /* Store then load external */
|
|
|
|
leaving
|
|
|
|
dup 4
|
|
|
|
ste $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Type conversions */
|
|
|
|
|
|
|
|
pat loc loc ciu /* signed X -> unsigned X */
|
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
loc $2
|
|
|
|
cuu
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */
|
|
|
|
/* nop */
|
|
|
|
|
|
|
|
pat loc loc cii $1==$2 /* signed X -> signed X */
|
|
|
|
/* nop */
|
|
|
|
|
|
|
|
pat loc loc cui $1==$2 /* unsigned X -> signed X */
|
|
|
|
/* nop */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */
|
|
|
|
/* nop */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */
|
|
|
|
/* nop */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {SEX_B, %1}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {SEX_H, %1}
|
|
|
|
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Local variables */
|
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
pat lal smalls($1) /* Load address of local */
|
2017-02-13 22:44:46 +00:00
|
|
|
yields {SUM_RC, fp, $1}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
pat lal /* Load address of local */
|
2017-02-13 22:44:46 +00:00
|
|
|
uses REG={SUM_RIS, fp, his($1)}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
yields {SUM_RC, %a, los($1)}
|
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Load word from local */
|
2017-10-14 16:40:04 +00:00
|
|
|
pat lol inreg($1)==reg_any || inreg($1)==reg_float
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {LOCAL, $1}
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat lol
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lal $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
loi 4
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Load double-word from local */
|
|
|
|
pat ldl inreg($1)==reg_float
|
|
|
|
yields {DLOCAL, $1}
|
|
|
|
pat ldl
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lal $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
loi 8
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Store word to local */
|
|
|
|
pat stl inreg($1)==reg_any
|
2017-10-17 18:15:33 +00:00
|
|
|
with exact ANY_BHW
|
|
|
|
/* ncg fails to infer that regvar($1) is dead! */
|
|
|
|
kills regvar($1)
|
|
|
|
gen move %1, {GPR_EXPR, regvar($1)}
|
|
|
|
with STACK
|
|
|
|
gen
|
|
|
|
lwz {LOCAL, $1}, {IND_RC_W, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 4}
|
2017-10-14 16:40:04 +00:00
|
|
|
pat stl inreg($1)==reg_float
|
2017-10-17 18:15:33 +00:00
|
|
|
with exact FSREG+IND_ALL_W
|
|
|
|
kills regvar_w($1, reg_float)
|
|
|
|
gen move %1, {FSREG_EXPR, regvar_w($1, reg_float)}
|
2017-10-14 16:40:04 +00:00
|
|
|
with STACK
|
|
|
|
gen
|
|
|
|
lfs {LOCAL, $1}, {IND_RC_W, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 4}
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat stl
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lal $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
sti 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Store double-word to local */
|
|
|
|
pat sdl inreg($1)==reg_float
|
|
|
|
with exact FREG+IND_ALL_D
|
2017-10-17 18:15:33 +00:00
|
|
|
kills regvar_d($1, reg_float)
|
|
|
|
gen move %1, {FPR_EXPR, regvar_d($1, reg_float)}
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
with STACK
|
|
|
|
gen
|
|
|
|
lfd {DLOCAL, $1}, {IND_RC_D, sp, 0}
|
|
|
|
addi sp, sp, {CONST, 8}
|
|
|
|
pat sdl
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lal $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
sti 8
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Load indirect from local */
|
|
|
|
pat lil inreg($1)==reg_any
|
2017-02-08 17:12:28 +00:00
|
|
|
yields {IND_RC_W, regvar($1), 0}
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat lil
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lol $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
loi 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sil /* Save to indirected local */
|
|
|
|
leaving
|
|
|
|
lol $1
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
sti 4
|
2016-09-30 15:50:50 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat zrl /* Zero local */
|
|
|
|
leaving
|
|
|
|
loc 0
|
|
|
|
stl $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat inl /* Increment local */
|
|
|
|
leaving
|
|
|
|
lol $1
|
|
|
|
loc 1
|
|
|
|
adi 4
|
|
|
|
stl $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat del /* Decrement local */
|
|
|
|
leaving
|
|
|
|
lol $1
|
|
|
|
loc 1
|
|
|
|
sbi 4
|
|
|
|
stl $1
|
|
|
|
|
|
|
|
|
|
|
|
/* Global variables */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lpi /* Load address of external function */
|
|
|
|
leaving
|
|
|
|
lae $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lae /* Load address of external */
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
uses REG={LABEL_HA, $1}
|
|
|
|
yields {SUM_RL, %a, $1}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loe /* Load word external */
|
|
|
|
leaving
|
|
|
|
lae $1
|
|
|
|
loi INT32
|
|
|
|
|
|
|
|
pat ste /* Store word external */
|
|
|
|
leaving
|
|
|
|
lae $1
|
|
|
|
sti INT32
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lde /* Load double-word external */
|
|
|
|
leaving
|
|
|
|
lae $1
|
|
|
|
loi INT64
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sde /* Store double-word external */
|
|
|
|
leaving
|
|
|
|
lae $1
|
|
|
|
sti INT64
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat zre /* Zero external */
|
|
|
|
leaving
|
|
|
|
loc 0
|
|
|
|
ste $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ine /* Increment external */
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
leaving
|
|
|
|
loe $1
|
|
|
|
inc
|
|
|
|
ste $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat dee /* Decrement external */
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
leaving
|
|
|
|
loe $1
|
|
|
|
dec
|
|
|
|
ste $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Structures */
|
|
|
|
|
|
|
|
pat lof /* Load word offsetted */
|
|
|
|
leaving
|
|
|
|
adp $1
|
|
|
|
loi INT32
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ldf /* Load double-word offsetted */
|
|
|
|
leaving
|
|
|
|
adp $1
|
|
|
|
loi INT64
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat stf /* Store word offsetted */
|
|
|
|
leaving
|
|
|
|
adp $1
|
|
|
|
sti INT32
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sdf /* Store double-word offsetted */
|
|
|
|
leaving
|
|
|
|
adp $1
|
|
|
|
sti INT64
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Loads and stores */
|
|
|
|
|
|
|
|
pat loi $1==INT8 /* Load byte indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_B, %1, 0}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RC
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_B, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with exact SUM_RL
|
|
|
|
yields {IND_RL_B, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RR
|
|
|
|
yields {IND_RR_B, %1.reg1, %1.reg2}
|
2016-10-16 22:13:39 +00:00
|
|
|
|
|
|
|
pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32
|
|
|
|
/* Load half-word indirect and sign extend */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_H_S, %1, 0}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RC
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_H_S, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with exact SUM_RL
|
|
|
|
yields {IND_RL_H_S, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RR
|
|
|
|
yields {IND_RR_H_S, %1.reg1, %1.reg2}
|
2016-10-16 22:13:39 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loi $1==INT16 /* Load half-word indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_H, %1, 0}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RC
|
2016-10-16 22:13:39 +00:00
|
|
|
yields {IND_RC_H, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with exact SUM_RL
|
|
|
|
yields {IND_RL_H, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RR
|
|
|
|
yields {IND_RR_H, %1.reg1, %1.reg2}
|
2016-10-16 22:13:39 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loi $1==INT32 /* Load word indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RC_W, %1, 0}
|
2017-02-02 15:48:25 +00:00
|
|
|
with exact SUM_RC
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RC_W, %1.reg, %1.off}
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
with exact SUM_RL
|
|
|
|
yields {IND_RL_W, %1.reg, %1.adr}
|
2017-02-02 15:48:25 +00:00
|
|
|
with exact SUM_RR
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RR_W, %1.reg1, %1.reg2}
|
|
|
|
|
|
|
|
pat loi $1==INT64 /* Load double-word indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RC_D, %1, 0}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RC
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RC_D, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with exact SUM_RL
|
|
|
|
yields {IND_RL_D, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact SUM_RR
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {IND_RR_D, %1.reg1, %1.reg2}
|
|
|
|
|
|
|
|
pat loi /* Load arbitrary size */
|
|
|
|
leaving
|
|
|
|
loc $1
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
los 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat los $1==4 /* Load arbitrary size */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
kills ALL
|
|
|
|
gen
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
bl {LABEL, ".los4"}
|
2016-10-16 22:13:39 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sti $1==INT8 /* Store byte indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_B, %1, 0}
|
|
|
|
with SUM_RC REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_B, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with SUM_RL REG
|
|
|
|
kills MEMORY
|
|
|
|
gen move %2, {IND_RL_B, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with SUM_RR REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RR_B, %1.reg1, %1.reg2}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat sti $1==INT16 /* Store half-word indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_H, %1, 0}
|
|
|
|
with SUM_RC REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_H, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with SUM_RL REG
|
|
|
|
kills MEMORY
|
|
|
|
gen move %2, {IND_RL_H, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with SUM_RR REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RR_H, %1.reg1, %1.reg2}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat sti $1==INT32 /* Store word indirect */
|
2017-02-02 15:48:25 +00:00
|
|
|
with REG REG+FSREG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_W, %1, 0}
|
|
|
|
with SUM_RC REG+FSREG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_W, %1.reg, %1.off}
|
In PowerPC ncg, allocate register for ha16[label].
Use it to generate code like
lis r12,ha16[__II0]
lis r11,ha16[_f]
lfs f1,lo16[_f](r11)
lfs f2,lo16[__II0](r12)
fadds f13,f2,f1
stfs f13,lo16[_f](r11)
Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again
in stfs. Before this change, we needed an extra lis before stfs,
because ncg did not remember that ha16[_f] was in a register.
This example has a gap between ha16[__II0] and lo16[__II0], because
the lo16 is not in the next instruction. This requires my previous
commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis
as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a
coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but
may not allocate any other registers. So I must allocate r12 earlier.
I allocate r12 in pat lae, but this causes a gap.
2017-02-08 17:23:06 +00:00
|
|
|
with SUM_RL REG+FSREG
|
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RL_W, %1.reg, %1.adr}
|
|
|
|
with SUM_RR REG+FSREG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RR_W, %1.reg1, %1.reg2}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat sti $1==INT64 /* Store double-word indirect */
|
2016-10-18 00:31:59 +00:00
|
|
|
with REG FREG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_D, %1, 0}
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with SUM_RC FREG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
gen move %2, {IND_RC_D, %1.reg, %1.off}
|
Use ha16/lo16 to load or store 1, 2, 8 bytes from labels.
Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with
the rules to use them. These rules emit shorter code. For example,
loading a byte becomes lis, lbz instead of lis, addi, lbz.
While making this, I wrongly set IND_RL_D to size 4. Then ncg made
infinite recursion in codegen() and stackupto(), until it crashed by
stack overflow. I correctly set IND_RL_D to size 8, preventing the
crash.
2017-02-08 17:31:14 +00:00
|
|
|
with SUM_RL FREG
|
|
|
|
kills MEMORY
|
|
|
|
gen move %2, {IND_RL_D, %1.reg, %1.adr}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with SUM_RR FREG
|
|
|
|
kills MEMORY
|
|
|
|
gen move %2, {IND_RR_D, %1.reg1, %1.reg2}
|
2017-02-13 23:11:27 +00:00
|
|
|
with REG REG REG
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 23:11:27 +00:00
|
|
|
move %2, {IND_RC_W, %1, 0}
|
|
|
|
move %3, {IND_RC_W, %1, 4}
|
2016-10-18 00:31:59 +00:00
|
|
|
/*
|
|
|
|
* Next 2 patterns exist because there is no coercion
|
2017-02-13 23:11:27 +00:00
|
|
|
* from IND_ALL_D to REG REG.
|
2016-10-18 00:31:59 +00:00
|
|
|
*/
|
|
|
|
with REG IND_RC_D
|
2016-10-16 22:13:39 +00:00
|
|
|
kills MEMORY
|
2017-02-13 23:11:27 +00:00
|
|
|
uses REG={SUM_RC, %2.reg, %2.off}, REG, REG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 23:11:27 +00:00
|
|
|
move {IND_RC_W, %a, 0}, %b
|
|
|
|
move {IND_RC_W, %a, 4}, %c
|
|
|
|
move %b, {IND_RC_W, %1, 0}
|
|
|
|
move %c, {IND_RC_W, %1, 4}
|
2016-10-18 00:31:59 +00:00
|
|
|
with REG IND_RR_D
|
|
|
|
kills MEMORY
|
2017-02-13 23:11:27 +00:00
|
|
|
uses REG={SUM_RR, %2.reg1, %2.reg2}, REG, REG
|
2016-10-18 00:31:59 +00:00
|
|
|
gen
|
2017-02-13 23:11:27 +00:00
|
|
|
move {IND_RC_W, %a, 0}, %b
|
|
|
|
move {IND_RC_W, %a, 4}, %c
|
|
|
|
move %b, {IND_RC_W, %1, 0}
|
|
|
|
move %c, {IND_RC_W, %1, 4}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat sti /* Store arbitrary size */
|
|
|
|
leaving
|
|
|
|
loc $1
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
sts 4
|
2016-10-16 22:13:39 +00:00
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat sts $1==4 /* Store arbitrary size */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
kills ALL
|
|
|
|
gen
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
bl {LABEL, ".sts4"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Arithmetic wrappers */
|
|
|
|
|
|
|
|
pat ads $1==4 /* Add var to pointer */
|
|
|
|
leaving adi $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sbs $1==4 /* Subtract var from pointer */
|
|
|
|
leaving sbi $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat adp /* Add constant to pointer */
|
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
adi 4
|
|
|
|
|
|
|
|
pat adu /* Add unsigned */
|
|
|
|
leaving
|
|
|
|
adi $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sbu /* Subtract unsigned */
|
|
|
|
leaving
|
|
|
|
sbi $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat inc /* Add 1 */
|
|
|
|
leaving
|
|
|
|
loc 1
|
|
|
|
adi 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat dec /* Subtract 1 */
|
|
|
|
leaving
|
|
|
|
loc 1
|
|
|
|
sbi 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-10 16:45:50 +00:00
|
|
|
pat mlu /* Multiply unsigned */
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
mli $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-10 16:45:50 +00:00
|
|
|
pat slu /* Shift left unsigned */
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
sli $1
|
|
|
|
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Word arithmetic */
|
|
|
|
|
|
|
|
pat adi $1==4 /* Add word (second + top) */
|
|
|
|
with REG REG
|
|
|
|
yields {SUM_RR, %1, %2}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST2 REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {SUM_RC, %2, %1.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG CONST2
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {SUM_RC, %1, %2.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST_HZ REG
|
|
|
|
uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
|
|
|
|
yields %a
|
|
|
|
with REG CONST_HZ
|
|
|
|
uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
|
|
|
|
yields %a
|
2017-02-08 17:12:28 +00:00
|
|
|
with CONST_STACK-CONST2-CONST_HZ REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG={SUM_RIS, %2, his(%1.val)}
|
|
|
|
yields {SUM_RC, %a, los(%1.val)}
|
2017-02-08 17:12:28 +00:00
|
|
|
with REG CONST_STACK-CONST2-CONST_HZ
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %1, REG={SUM_RIS, %1, his(%2.val)}
|
|
|
|
yields {SUM_RC, %a, los(%2.val)}
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sbi $1==4 /* Subtract word (second - top) */
|
|
|
|
with REG REG
|
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
subf %a, %1, %2
|
|
|
|
yields %a
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST2_WHEN_NEG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {SUM_RC, %2, 0-%1.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST_HZ REG
|
|
|
|
uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
|
|
|
|
yields %a
|
2017-02-08 17:12:28 +00:00
|
|
|
with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)}
|
|
|
|
yields {SUM_RC, %a, los(0-%1.val)}
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ngi $1==4 /* Negate word */
|
|
|
|
with REG
|
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
neg %a, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat mli $1==4 /* Multiply word (second * top) */
|
|
|
|
with REG REG
|
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
mullw %a, %2, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat dvi $1==4 /* Divide word (second / top) */
|
|
|
|
with REG REG
|
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
divw %a, %2, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat dvu $1==4 /* Divide unsigned word (second / top) */
|
|
|
|
with REG REG
|
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
divwu %a, %2, %1
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat rmi $1==4 /* Remainder word (second % top) */
|
|
|
|
with REG REG
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
divw %a, %2, %1
|
|
|
|
mullw %a, %a, %1
|
|
|
|
subf %a, %a, %2
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat rmu $1==4 /* Remainder unsigned word (second % top) */
|
|
|
|
with REG REG
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
divwu %a, %2, %1
|
|
|
|
mullw %a, %a, %1
|
|
|
|
subf %a, %a, %2
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat and $1==4 /* AND word */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG NOT_R
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
andc %a, %1, %2.reg
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with NOT_R REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
andc %a, %2, %1.reg
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {AND_RR, %1, %2}
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG UCONST2
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
andiX %a, %1, {CONST, %2.val}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with UCONST2 REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
andiX %a, %2, {CONST, %1.val}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG CONST_HZ
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
andisX %a, %1, {CONST, hi(%2.val)}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with CONST_HZ REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
andisX %a, %2, {CONST, hi(%1.val)}
|
|
|
|
yields %a
|
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat and defined($1) /* AND set */
|
2017-01-17 21:31:38 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".and"
|
|
|
|
|
|
|
|
pat and !defined($1)
|
|
|
|
leaving
|
|
|
|
cal ".and"
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ior $1==4 /* OR word */
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG NOT_R
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
|
|
|
orc %a, %1, %2.reg
|
|
|
|
yields %a
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with NOT_R REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
orc %a, %2, %1.reg
|
|
|
|
yields %a
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {OR_RR, %1, %2}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG UCONST2
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {OR_RC, %1, %2.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with UCONST2 REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {OR_RC, %2, %1.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG CONST_HZ
|
|
|
|
uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
|
|
|
|
yields %a
|
|
|
|
with CONST_HZ REG
|
|
|
|
uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
|
|
|
|
yields %a
|
2017-02-08 17:12:28 +00:00
|
|
|
with REG CONST_STACK-UCONST2-CONST_HZ
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %1, REG={OR_RIS, %1, hi(%2.val)}
|
|
|
|
yields {OR_RC, %1, lo(%2.val)}
|
2017-02-08 17:12:28 +00:00
|
|
|
with CONST_STACK-UCONST2-CONST_HZ REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG={OR_RIS, %2, hi(%1.val)}
|
|
|
|
yields {OR_RC, %2, lo(%1.val)}
|
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat ior defined($1) /* OR set */
|
2017-01-15 21:28:14 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".ior"
|
2016-12-10 17:23:07 +00:00
|
|
|
|
|
|
|
/* OR set (variable), used in lang/m2/libm2/LtoUset.e */
|
|
|
|
pat ior !defined($1)
|
2017-01-15 21:28:14 +00:00
|
|
|
leaving
|
|
|
|
cal ".ior"
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat xor $1==4 /* XOR word */
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {XOR_RR, %1, %2}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG UCONST2
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {XOR_RC, %1, %2.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with UCONST2 REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {XOR_RC, %2, %1.val}
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with REG CONST_HZ
|
|
|
|
uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
|
|
|
|
yields %a
|
|
|
|
with CONST_HZ REG
|
|
|
|
uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
|
|
|
|
yields %a
|
2017-02-08 17:12:28 +00:00
|
|
|
with REG CONST_STACK-UCONST2-CONST_HZ
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)}
|
|
|
|
yields {XOR_RC, %1, lo(%2.val)}
|
2017-02-08 17:12:28 +00:00
|
|
|
with CONST_STACK-UCONST2-CONST_HZ REG
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)}
|
|
|
|
yields {XOR_RC, %2, lo(%1.val)}
|
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat xor defined($1) /* XOR set */
|
2017-02-11 23:00:56 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".xor"
|
|
|
|
|
|
|
|
pat xor !defined($1)
|
|
|
|
leaving
|
|
|
|
cal ".xor"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat com $1==INT32 /* NOT word */
|
|
|
|
with AND_RR
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
nand %a, %1.reg1, %1.reg2
|
|
|
|
yields %a
|
|
|
|
with OR_RR
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
nor %a, %1.reg1, %1.reg2
|
|
|
|
yields %a
|
|
|
|
with XOR_RR
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
eqv %a, %1.reg1, %1.reg2
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
yields {NOT_R, %1}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat com defined($1) /* NOT set */
|
2017-01-17 21:31:38 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".com"
|
|
|
|
|
|
|
|
pat com !defined($1)
|
|
|
|
leaving
|
|
|
|
cal ".com"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat zer $1==4 /* Push zero */
|
|
|
|
leaving
|
|
|
|
loc 0
|
|
|
|
|
2017-01-15 21:28:14 +00:00
|
|
|
pat zer defined($1) /* Create empty set */
|
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".zer"
|
2016-12-10 17:23:07 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sli $1==4 /* Shift left (second << top) */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with CONST_STACK REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
slw %a, %2, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sri $1==4 /* Shift right signed (second >> top) */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with CONST_STACK REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
srawi %a, %2, {CONST, %1.val & 0x1F}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
sraw %a, %2, %1
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat sru $1==4 /* Shift right unsigned (second >> top) */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with CONST_STACK REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31}
|
|
|
|
yields %a
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %2, REG
|
|
|
|
gen
|
|
|
|
srw %a, %2, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
/* Arrays */
|
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat aar $1==4 /* Address of array element */
|
|
|
|
leaving
|
|
|
|
cal ".aar4"
|
|
|
|
|
|
|
|
pat lar $1==4 /* Load from array */
|
|
|
|
with STACK
|
Add some missing clauses to los, sts, aar, inn, cmi, cmu.
We only implement 'los 4', 'sts 4', 'cmi 4', 'cmu 4', not for sizes
other than 4. Add clause $1==4.
We only implement inn when defined($1).
The rule for aar needs 'kills ALL' because it kills many registers,
like other rules that call libem.
2016-12-10 00:49:50 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
bl {LABEL, ".aar4"}
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
/* pass r3 = size from .aar4 to .los4 */
|
|
|
|
bl {LABEL, ".los4"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat lae lar $2==4 && nicesize(rom($1, 3))
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lae $1
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
aar 4
|
2007-11-02 18:56:58 +00:00
|
|
|
loi rom($1, 3)
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat sar $1==4 /* Store to array */
|
|
|
|
with STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
kills ALL
|
|
|
|
gen
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
bl {LABEL, ".aar4"}
|
|
|
|
/* pass r3 = size from .aar4 to .sts4 */
|
|
|
|
bl {LABEL, ".sts4"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
pat lae sar $2==4 && nicesize(rom($1, 3))
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lae $1
|
Use .los4 in lar 4 and .sts4 in sar 4.
Our libem had two implementations of loading a block from a stack, one
for lar 4 and one for los 4. Now lar 4 and los 4 share the code in
.los4. Likewise, sar 4 and sts 4 share the code in .sts4.
Rename .los to .los4 and .sts to .sts4, because they implement los 4
and sts 4. Remove the special case for loading or storing 4 bytes,
because we can do it with 1 iteration of the loop. Remove the lines
to "align size" where the size must already be a multiple of 4.
Fix the upper bound check in .aar4.
Change .aar4, .lar4, .los4, .sar4, .sts4 to pass all operands on the
real stack, except that .los4 and .sts4 take the size in register r3.
Have .aar4 set r3 to the size of the array element. So lar 4 is just
.aar4 then .los4, and sar 4 is just .aar4 then .sts4.
ncg no longer calls .lar4 and .sar4 in libem, because it inlines the
code; but I keep .lar4 and .sar4 in libem, because mcg references
them. They might or might not work in mcg.
2017-02-13 20:22:00 +00:00
|
|
|
aar 4
|
2007-11-02 18:56:58 +00:00
|
|
|
sti rom($1, 3)
|
|
|
|
|
|
|
|
|
|
|
|
/* Sets */
|
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat set defined($1) /* Create singleton set */
|
2017-01-15 21:28:14 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".set"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
/* Create set (variable), used in lang/m2/libm2/LtoUset.e */
|
|
|
|
pat set !defined($1)
|
2017-01-15 21:28:14 +00:00
|
|
|
leaving
|
|
|
|
cal ".set"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add some missing clauses to los, sts, aar, inn, cmi, cmu.
We only implement 'los 4', 'sts 4', 'cmi 4', 'cmu 4', not for sizes
other than 4. Add clause $1==4.
We only implement inn when defined($1).
The rule for aar needs 'kills ALL' because it kills many registers,
like other rules that call libem.
2016-12-10 00:49:50 +00:00
|
|
|
pat inn defined($1) /* Test for set bit */
|
2017-01-15 21:28:14 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".inn"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-17 21:31:38 +00:00
|
|
|
pat inn !defined($1)
|
|
|
|
leaving
|
|
|
|
cal ".inn"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Boolean resolutions */
|
|
|
|
|
|
|
|
pat teq /* top = (top == 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XEQ, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat tne /* top = (top != 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XNE, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat tlt /* top = (top < 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XLT, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat tle /* top = (top <= 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XLE, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat tgt /* top = (top > 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XGT, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat tge /* top = (top >= 0) */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, REG
|
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
mfcr %a
|
|
|
|
move {XGE, %a}, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi teq $1==4 /* Signed second == top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi tne $1==4 /* Signed second != top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi tgt $1==4 /* Signed second > top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi tge $1==4 /* Signed second >= top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi tlt $1==4 /* Signed second < top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmi tle $1==4 /* Signed second <= top */
|
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %1, REG={COND_RC, %2, %1.val}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmu teq $1==4 /* Unsigned second == top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmu tne $1==4 /* Unsigned second != top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmu tgt $1==4 /* Unsigned second > top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmu tge $1==4 /* Unsigned second >= top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmu tlt $1==4 /* Unsigned second < top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
pat cmu tle $1==4 /* Unsigned second <= top */
|
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %1, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
|
|
|
|
/* Simple branches */
|
|
|
|
|
|
|
|
proc zxx example zeq
|
|
|
|
with REG STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
test %1
|
|
|
|
bxx* {LABEL, $1}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
/* Pop signed int, branch if... */
|
|
|
|
pat zeq call zxx("beq") /* top == 0 */
|
|
|
|
pat zne call zxx("bne") /* top != 0 */
|
|
|
|
pat zgt call zxx("bgt") /* top > 0 */
|
|
|
|
pat zge call zxx("bge") /* top >= 0 */
|
|
|
|
pat zlt call zxx("blt") /* top < 0 */
|
|
|
|
pat zle call zxx("ble") /* top >= 0 */
|
|
|
|
|
|
|
|
/* The peephole optimizer rewrites
|
|
|
|
* cmi 4 zeq
|
|
|
|
* as beq, and does same for bne, bgt, and so on.
|
|
|
|
*/
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
proc bxx example beq
|
|
|
|
with REG CONST2 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
cmpwi %1, {CONST, %2.val}
|
|
|
|
bxx[2] {LABEL, $1}
|
|
|
|
with CONST2 REG STACK
|
|
|
|
gen
|
|
|
|
cmpwi %2, {CONST, %1.val}
|
|
|
|
bxx[1] {LABEL, $1}
|
|
|
|
with REG REG STACK
|
|
|
|
gen
|
|
|
|
cmpw %2, %1
|
|
|
|
bxx[1] {LABEL, $1}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
/* Pop two signed ints, branch if... */
|
|
|
|
pat beq call bxx("beq", "beq") /* second == top */
|
|
|
|
pat bne call bxx("bne", "bne") /* second != top */
|
|
|
|
pat bgt call bxx("bgt", "blt") /* second > top */
|
|
|
|
pat bge call bxx("bge", "ble") /* second >= top */
|
|
|
|
pat blt call bxx("blt", "bgt") /* second < top */
|
|
|
|
pat ble call bxx("ble", "bge") /* second >= top */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
proc cmu4zxx example cmu zeq
|
|
|
|
with REG CONST2 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
cmplwi %1, {CONST, %2.val}
|
|
|
|
bxx[2] {LABEL, $2}
|
|
|
|
with CONST2 REG STACK
|
|
|
|
gen
|
|
|
|
cmplwi %2, {CONST, %1.val}
|
|
|
|
bxx[1] {LABEL, $2}
|
|
|
|
with REG REG STACK
|
|
|
|
gen
|
|
|
|
cmplw %2, %1
|
|
|
|
bxx[1] {LABEL, $2}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
/* Pop two unsigned ints, branch if... */
|
|
|
|
pat cmu zeq $1==4 call cmu4zxx("beq", "beq")
|
|
|
|
pat cmu zne $1==4 call cmu4zxx("bne", "bne")
|
|
|
|
pat cmu zgt $1==4 call cmu4zxx("bgt", "blt")
|
|
|
|
pat cmu zge $1==4 call cmu4zxx("bge", "ble")
|
|
|
|
pat cmu zlt $1==4 call cmu4zxx("blt", "bgt")
|
|
|
|
pat cmu zle $1==4 call cmu4zxx("ble", "bge")
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-01-26 00:08:55 +00:00
|
|
|
/* Comparisons */
|
|
|
|
|
|
|
|
/* Each comparison extracts the lt and gt bits from cr0.
|
|
|
|
* extlwi %a, %a, 2, 0
|
|
|
|
* puts lt in the sign bit, so lt yields a negative result,
|
|
|
|
* gt yields positive.
|
|
|
|
* rlwinm %a, %a, 1, 31, 0
|
|
|
|
* puts gt in the sign bit, to reverse the comparison.
|
|
|
|
*/
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Add some missing clauses to los, sts, aar, inn, cmi, cmu.
We only implement 'los 4', 'sts 4', 'cmi 4', 'cmu 4', not for sizes
other than 4. Add clause $1==4.
We only implement inn when defined($1).
The rule for aar needs 'kills ALL' because it kills many registers,
like other rules that call libem.
2016-12-10 00:49:50 +00:00
|
|
|
pat cmi $1==INT32 /* Signed tristate compare */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG CONST2
|
|
|
|
uses reusing %1, REG={COND_RC, %1, %2.val}
|
|
|
|
gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
with CONST2 REG
|
|
|
|
uses reusing %2, REG={COND_RC, %2, %1.val}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={COND_RR, %2, %1}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add some missing clauses to los, sts, aar, inn, cmi, cmu.
We only implement 'los 4', 'sts 4', 'cmi 4', 'cmu 4', not for sizes
other than 4. Add clause $1==4.
We only implement inn when defined($1).
The rule for aar needs 'kills ALL' because it kills many registers,
like other rules that call libem.
2016-12-10 00:49:50 +00:00
|
|
|
pat cmu $1==INT32 /* Unsigned tristate compare */
|
2017-01-26 00:08:55 +00:00
|
|
|
with REG UCONST2
|
|
|
|
uses reusing %1, REG={CONDL_RC, %1, %2.val}
|
|
|
|
gen rlwinm %a, %a, {CONST, 1}, {CONST, 31}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
with UCONST2 REG
|
|
|
|
uses reusing %2, REG={CONDL_RC, %2, %1.val}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
with REG REG
|
|
|
|
uses reusing %1, REG={CONDL_RR, %2, %1}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat cmp /* Compare pointers */
|
|
|
|
leaving
|
|
|
|
cmu INT32
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat cms $1==INT32 /* Compare blocks (word sized) */
|
|
|
|
leaving
|
|
|
|
cmi INT32
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2016-12-10 17:23:07 +00:00
|
|
|
pat cms defined($1)
|
2017-02-13 21:52:32 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
cal ".cms"
|
|
|
|
|
|
|
|
pat cms !defined($1)
|
|
|
|
leaving
|
|
|
|
cal ".cms"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Other branching and labelling */
|
|
|
|
|
|
|
|
pat lab topeltsize($1)==4 && !fallthrough($1)
|
2017-01-24 16:26:35 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
labeldef $1
|
2017-02-13 22:44:46 +00:00
|
|
|
yields r3
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lab topeltsize($1)==4 && fallthrough($1)
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3 STACK
|
2017-01-24 16:26:35 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
labeldef $1
|
2017-02-13 22:44:46 +00:00
|
|
|
yields r3
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lab topeltsize($1)!=4
|
|
|
|
with STACK
|
|
|
|
kills ALL
|
|
|
|
gen
|
|
|
|
labeldef $1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
b {LABEL, $1}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */
|
|
|
|
with STACK
|
|
|
|
gen
|
|
|
|
b {LABEL, $1}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* Miscellaneous */
|
|
|
|
|
|
|
|
pat cal /* Call procedure */
|
|
|
|
with STACK
|
|
|
|
kills ALL
|
|
|
|
gen
|
|
|
|
bl {LABEL, $1}
|
|
|
|
|
|
|
|
pat cai /* Call procedure indirect */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
kills ALL
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
mtspr ctr, %1
|
2017-01-26 00:08:55 +00:00
|
|
|
bctrl.
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lfr $1==INT32 /* Load function result, word */
|
2017-02-13 22:44:46 +00:00
|
|
|
yields r3
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lfr $1==INT64 /* Load function result, double-word */
|
2017-02-13 22:44:46 +00:00
|
|
|
yields r4 r3
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat ret $1==0 /* Return from procedure */
|
|
|
|
gen
|
2017-02-17 02:18:39 +00:00
|
|
|
/* Restore saved registers. */
|
2007-11-02 18:56:58 +00:00
|
|
|
return
|
2017-02-17 02:18:39 +00:00
|
|
|
/* Epilog: restore lr and fp. */
|
|
|
|
lwz r0, {IND_RC_W, fp, 4}
|
|
|
|
mtspr lr, r0
|
|
|
|
lwz r0, {IND_RC_W, fp, 0}
|
|
|
|
/* Free our stack frame. */
|
|
|
|
addi sp, fp, {CONST, 8}
|
|
|
|
mr fp, r0
|
|
|
|
blr.
|
|
|
|
|
|
|
|
pat ret $1==4 /* Return from procedure, word */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3
|
2017-02-17 02:18:39 +00:00
|
|
|
leaving ret 0
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-02-17 02:18:39 +00:00
|
|
|
pat ret $1==8 /* Return from proc, double-word */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3 REG
|
|
|
|
gen move %2, r4
|
|
|
|
leaving ret 0
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-10-18 16:12:42 +00:00
|
|
|
/*
|
|
|
|
* These rules for blm/bls are wrong if length is zero.
|
|
|
|
* So are several procedures in libem.
|
|
|
|
*/
|
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat blm /* Block move constant length */
|
2017-02-12 00:30:12 +00:00
|
|
|
leaving
|
|
|
|
loc $1
|
|
|
|
bls
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat bls /* Block move variable length */
|
2017-02-12 00:30:12 +00:00
|
|
|
with REG REG REG
|
2017-10-18 16:12:42 +00:00
|
|
|
/* ( src%3 dst%2 len%1 -- ) */
|
|
|
|
uses reusing %1, REG, REG, REG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-10-18 16:12:42 +00:00
|
|
|
srwi %a, %1, {CONST, 2}
|
|
|
|
mtspr ctr, %a
|
|
|
|
addi %b, %3, {CONST, 0-4}
|
|
|
|
addi %c, %2, {CONST, 0-4}
|
|
|
|
1: lwzu %a, {IND_RC_W, %b, 4}
|
|
|
|
stwu %a, {IND_RC_W, %c, 4}
|
2017-02-12 00:30:12 +00:00
|
|
|
bdnz {LABEL, "1b"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat csa /* Array-lookup switch */
|
2016-11-19 09:55:41 +00:00
|
|
|
with STACK
|
2017-02-13 21:38:26 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
b {LABEL, ".csa"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat csb /* Table-lookup switch */
|
2016-11-19 09:55:41 +00:00
|
|
|
with STACK
|
2017-02-13 21:38:26 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
b {LABEL, ".csb"}
|
|
|
|
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
/* EM specials */
|
|
|
|
|
|
|
|
pat fil /* Set current filename */
|
|
|
|
leaving
|
|
|
|
lae $1
|
2016-09-30 17:40:36 +00:00
|
|
|
ste "hol0+4"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lin /* Set current line number */
|
|
|
|
leaving
|
|
|
|
loc $1
|
2016-09-30 17:40:36 +00:00
|
|
|
ste "hol0"
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat lni /* Increment line number */
|
|
|
|
leaving
|
2016-09-30 17:40:36 +00:00
|
|
|
ine "hol0"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat lim /* Load EM trap ignore mask */
|
|
|
|
leaving
|
|
|
|
lde ".ignmask"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sim /* Store EM trap ignore mask */
|
|
|
|
leaving
|
|
|
|
ste ".ignmask"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat trp /* Raise EM trap */
|
2017-02-18 00:32:27 +00:00
|
|
|
with REG3
|
2017-02-13 21:38:26 +00:00
|
|
|
kills ALL
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
|
|
|
bl {LABEL, ".trap"}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat sig /* Set trap handler */
|
|
|
|
leaving
|
|
|
|
ste ".trppc"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat rtt /* Return from trap */
|
|
|
|
leaving
|
|
|
|
ret 0
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
/*
|
|
|
|
* Lexical local base: lxl 0 yields our fp, lxl n yields the
|
|
|
|
* fp of the nth statically enclosing procedure.
|
|
|
|
*/
|
|
|
|
pat lxl $1==0
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lor 0
|
2017-02-14 04:22:31 +00:00
|
|
|
pat lxl $1==1
|
|
|
|
yields {IND_RC_W, fp, SL_OFFSET}
|
|
|
|
pat lxl $1==2
|
|
|
|
uses REG={IND_RC_W, fp, SL_OFFSET}
|
|
|
|
yields {IND_RC_W, %a, SL_OFFSET}
|
|
|
|
pat lxl $1==3
|
|
|
|
uses REG={IND_RC_W, fp, SL_OFFSET}, reusing %a, REG
|
|
|
|
gen move {IND_RC_W, %a, SL_OFFSET}, %b
|
|
|
|
yields {IND_RC_W, %b, SL_OFFSET}
|
|
|
|
pat lxl $1>=4 && $1<=0x8000
|
|
|
|
uses REG={IND_RC_W, fp, SL_OFFSET},
|
|
|
|
REG={CONST_0000_7FFF, $1-1}
|
|
|
|
gen
|
|
|
|
mtspr ctr, %b
|
2017-10-18 16:12:42 +00:00
|
|
|
1: lwz %a, {IND_RC_W, %a, SL_OFFSET}
|
2017-02-14 04:22:31 +00:00
|
|
|
bdnz {LABEL, "1b"}
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat dch /* Dynamic chain: LB -> caller's LB */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2017-02-14 04:22:31 +00:00
|
|
|
yields {IND_RC_W, %1, FP_OFFSET}
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat lpb /* LB -> argument base */
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
adp EM_BSIZE
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat lxa /* Lexical argument base */
|
2007-11-02 18:56:58 +00:00
|
|
|
leaving
|
|
|
|
lxl $1
|
|
|
|
lpb
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat gto /* longjmp */
|
2017-02-13 21:38:26 +00:00
|
|
|
with STACK
|
|
|
|
uses REG
|
|
|
|
gen
|
|
|
|
move {LABEL, $1}, %a
|
2017-02-13 22:44:46 +00:00
|
|
|
move {IND_RC_W, %a, 8}, fp
|
|
|
|
move {IND_RC_W, %a, 4}, sp
|
2017-02-13 21:38:26 +00:00
|
|
|
move {IND_RC_W, %a, 0}, %a
|
2017-02-13 22:44:46 +00:00
|
|
|
mtspr ctr, %a
|
2017-02-13 21:38:26 +00:00
|
|
|
bctr.
|
2007-11-02 18:56:58 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat lor $1==0 /* Load local base */
|
2007-11-02 18:56:58 +00:00
|
|
|
uses REG
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move fp, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat lor $1==1 /* Load stack pointer */
|
2007-11-02 18:56:58 +00:00
|
|
|
uses REG
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move sp, %a
|
2007-11-02 18:56:58 +00:00
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat str $1==0 /* Store local base */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move %1, fp
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-14 04:22:31 +00:00
|
|
|
pat str $1==1 /* Store stack pointer */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move %1, sp
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2016-12-09 22:32:42 +00:00
|
|
|
pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with exact REG
|
2016-09-28 04:13:35 +00:00
|
|
|
/* nop */
|
|
|
|
with STACK
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
addi sp, sp, {CONST, 4}
|
2016-09-28 04:13:35 +00:00
|
|
|
|
2016-12-09 22:32:42 +00:00
|
|
|
pat ass $1==4 /* Adjust stack by variable amount */
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST2 STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move {SUM_RC, sp, %1.val}, sp
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
with CONST_HZ STACK
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move {SUM_RC, sp, his(%1.val)}, sp
|
2017-02-08 17:12:28 +00:00
|
|
|
with CONST_STACK-CONST2-CONST_HZ STACK
|
Refactor how powerpc ncg pushes constants.
When loc (load constant) pushes a constant, it now checks the value of
the constant and pushes any of 7 tokens. These tokens allow stack
patterns to recognize 16-bit signed integers (CONST2), 16-bit unsigned
integers (UCONST2), multiples of 0x10000 (CONST_HZ), and other
interesting forms of constants.
Use the new constant tokens in the rules for adi, sbi, and, ior, xor.
Adjust a few other rules to understand the new tokens.
Require that SUM_RC has a signed 16-bit constant, and OR_RC and XOR_RC
each have an unsigned 16-bit constant. The moves from SUM_RC, OR_RC,
XOR_RC to GPR no longer touch the scratch register, because the
constant is not too big.
2016-10-16 17:58:54 +00:00
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move {SUM_RC, sp, his(%1.val)}, sp
|
|
|
|
move {SUM_RC, sp, los(%1.val)}, sp
|
Trimming mach/powerpc/ncg/table
Remove coercion from LABEL to REG. The coercion never happens because
I have stopped putting LABEL on the stack. Also remove LABEL from set
ANY_BHW. Retain the move from LABEL to REG because pat gto uses it.
Remove li32 instruction, unused after the switch to the hi16, ha16,
lo16 syntax.
Remove COMMENT(...) lines from most moves. In my opinion, they took
too much space, both in the table and in the assembly output. The
stacking rules and coercions keep their COMMENT(...) lines.
In test GPR, don't write to RSCRATCH.
Fold several coercions into a single coercion from ANY_BHW uses REG.
Use REG instead of GPR in stack patterns. REG and GPR act the same,
because every GPR on the stack is a REG, but I want to be clear that I
expect a REG, not r0.
In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later.
Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the
peephole optimizer can optimize it. If $2!=$4, then the EM program is
missing a conversion from size $2 to size $4.
Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These
rules would never get used, unless the EM program is missing a
conversion from size 4 to size 1 or 2.
2017-02-08 17:27:16 +00:00
|
|
|
with REG STACK
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
move {SUM_RR, sp, %1}, sp
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat asp /* Adjust stack by constant amount */
|
|
|
|
leaving
|
|
|
|
loc $1
|
2016-12-09 22:32:42 +00:00
|
|
|
ass 4
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-01-15 21:28:14 +00:00
|
|
|
pat lae rck $2==4 /* Range check */
|
2017-01-17 21:31:38 +00:00
|
|
|
with REG
|
2017-02-13 21:38:26 +00:00
|
|
|
kills ALL
|
2017-01-15 21:28:14 +00:00
|
|
|
gen
|
2017-01-26 00:08:55 +00:00
|
|
|
cmpwi %1, {CONST, rom($1, 1)}
|
|
|
|
blt {LABEL, ".trap_erange"}
|
|
|
|
cmpwi %1, {CONST, rom($1, 2)}
|
|
|
|
bgt {LABEL, ".trap_erange"}
|
2017-01-17 21:31:38 +00:00
|
|
|
yields %1
|
2017-01-15 21:28:14 +00:00
|
|
|
|
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
/* Single-precision floating-point */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat zrf $1==INT32 /* Push zero */
|
|
|
|
leaving
|
|
|
|
loe ".fs_00000000"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-17 21:53:03 +00:00
|
|
|
pat adf $1==4 /* Add single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG FSREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fadds %a, %2, %1
|
|
|
|
yields %a
|
2017-10-17 21:53:03 +00:00
|
|
|
pat adf stl $1==4 && inreg($2)==reg_float
|
|
|
|
with FSREG FSREG
|
|
|
|
gen fadds {LOCAL, $2}, %2, %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-17 21:53:03 +00:00
|
|
|
pat sbf $1==4 /* Subtract single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG FSREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fsubs %a, %2, %1
|
|
|
|
yields %a
|
2017-10-17 21:53:03 +00:00
|
|
|
pat sbf stl $1==4 && inreg($2)==reg_float
|
|
|
|
with FSREG FSREG
|
|
|
|
gen fsubs {LOCAL, $2}, %2, %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-17 21:53:03 +00:00
|
|
|
pat mlf $1==4 /* Multiply single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG FSREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fmuls %a, %2, %1
|
|
|
|
yields %a
|
2017-10-17 21:53:03 +00:00
|
|
|
pat mlf stl $1==4 && inreg($2)==reg_float
|
|
|
|
with FSREG FSREG
|
|
|
|
gen fmuls {LOCAL, $2}, %2, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat dvf $1==INT32 /* Divide single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG FSREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fdivs %a, %2, %1
|
|
|
|
yields %a
|
2017-10-17 21:53:03 +00:00
|
|
|
pat dvf stl $1==4 && inreg($2)==reg_float
|
|
|
|
with FSREG FSREG
|
|
|
|
gen fdivs {LOCAL, $2}, %2, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat ngf $1==INT32 /* Negate single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fneg %a, %1
|
|
|
|
yields %a
|
2017-10-17 21:53:03 +00:00
|
|
|
pat ngf stl $1==4 && inreg($2)==reg_float
|
|
|
|
with FSREG
|
|
|
|
gen fneg {LOCAL, $2}, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat cmf $1==INT32 /* Compare single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG FSREG
|
2017-01-26 00:08:55 +00:00
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf teq $1==4 /* Single second == top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tne $1==4 /* Single second == top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tgt $1==4 /* Single second > top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tge $1==4 /* Single second >= top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tlt $1==4 /* Single second < top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tle $1==4 /* Single second <= top */
|
|
|
|
with FSREG FSREG
|
|
|
|
uses REG={COND_FS, %2, %1}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
proc cmf4zxx example cmf zeq
|
2017-02-18 00:29:45 +00:00
|
|
|
with FSREG FSREG STACK
|
2017-01-26 00:08:55 +00:00
|
|
|
uses REG
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
fcmpo cr0, %2, %1
|
2017-01-26 00:08:55 +00:00
|
|
|
bxx* {LABEL, $2}
|
|
|
|
|
|
|
|
/* Pop 2 singles, branch if... */
|
|
|
|
pat cmf zeq $1==4 call cmf4zxx("beq")
|
|
|
|
pat cmf zne $1==4 call cmf4zxx("bne")
|
|
|
|
pat cmf zgt $1==4 call cmf4zxx("bgt")
|
|
|
|
pat cmf zge $1==4 call cmf4zxx("bge")
|
|
|
|
pat cmf zlt $1==4 call cmf4zxx("blt")
|
|
|
|
pat cmf zle $1==4 call cmf4zxx("ble")
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FSREG
|
|
|
|
yields %1.1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-12 04:23:47 +00:00
|
|
|
/* Convert single to signed int */
|
|
|
|
pat loc loc cfi $1==4 && $2==4
|
|
|
|
leaving
|
|
|
|
loc 4
|
|
|
|
loc 8
|
|
|
|
cff
|
|
|
|
loc 8
|
|
|
|
loc 4
|
|
|
|
cfi
|
|
|
|
|
|
|
|
/* Convert single to unsigned int */
|
|
|
|
pat loc loc cfu $1==4 && $2==4
|
|
|
|
leaving
|
|
|
|
loc 4
|
|
|
|
loc 8
|
|
|
|
cff
|
|
|
|
loc 8
|
|
|
|
loc 4
|
|
|
|
cfu
|
|
|
|
|
|
|
|
/* Convert signed int to single */
|
|
|
|
pat loc loc cif $1==4 && $2==4
|
|
|
|
leaving
|
|
|
|
loc 4
|
|
|
|
loc 8
|
|
|
|
cif
|
|
|
|
loc 8
|
|
|
|
loc 4
|
|
|
|
cff
|
|
|
|
|
|
|
|
/* Convert unsigned int to single */
|
|
|
|
pat loc loc cuf $1==4 && $2==4
|
|
|
|
leaving
|
|
|
|
loc 4
|
|
|
|
loc 8
|
|
|
|
cuf
|
|
|
|
loc 8
|
|
|
|
loc 4
|
|
|
|
cff
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
|
|
|
|
/* Double-precision floating-point */
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat zrf $1==INT64 /* Push zero */
|
|
|
|
leaving
|
|
|
|
lde ".fd_00000000"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat adf $1==8 /* Add double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG FREG
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
uses reusing %1, FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fadd %a, %2, %1
|
|
|
|
yields %a
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat adf sdl $1==8 && inreg($2)==reg_float
|
|
|
|
with FREG FREG
|
|
|
|
gen fadd {DLOCAL, $2}, %2, %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat sbf $1==8 /* Subtract double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG FREG
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
uses reusing %1, FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fsub %a, %2, %1
|
|
|
|
yields %a
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat sbf sdl $1==8 && inreg($2)==reg_float
|
|
|
|
with FREG FREG
|
|
|
|
gen fsub {DLOCAL, $2}, %2, %1
|
2016-12-09 21:36:42 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat mlf $1==8 /* Multiply double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, FREG
|
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fmul %a, %2, %1
|
|
|
|
yields %a
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat mlf sdl $1==8 && inreg($2)==reg_float
|
|
|
|
with FREG FREG
|
|
|
|
gen fmul {DLOCAL, $2}, %2, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat dvf $1==8 /* Divide double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, FREG
|
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fdiv %a, %2, %1
|
|
|
|
yields %a
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat dvf sdl $1==8 && inreg($2)==reg_float
|
|
|
|
with FREG FREG
|
|
|
|
gen fdiv {DLOCAL, $2}, %2, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat ngf $1==8 /* Negate double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
uses reusing %1, FREG
|
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
fneg %a, %1
|
|
|
|
yields %a
|
Add floating-point register variables to PowerPC ncg.
Use f14 to f31 as register variables for 8-byte double-precison.
There are no regvars for 4-byte double precision, because all
regvar(reg_float) must have the same size. I expect more programs to
prefer 8-byte double precision.
Teach mach/powerpc/ncg/mach.c to emit stfd and lfd instructions to
save and restore 8-byte regvars. Delay emitting the function prolog
until f_regsave(), so we can use one addi to make stack space for both
local vars and saved registers. Be more careful with types in mach.c;
don't assume that int and long and full are the same.
In ncg table, add f14 to f31 as register variables, and some rules to
use them. Add rules to put the result of fadd, fsub, fmul, fdiv, fneg
in a regvar. Without such rules, the result would go in a scratch
FREG, and we would need fmr to move it to the regvar. Also add a rule
for pat sdl inreg($1)==reg_float with STACK, so we can unstack the
value directly into the regvar, again without a scratch FREG and fmr.
Edit util/ego/descr/powerpc.descr to tell ego about the new float
regvars. This might not be working right; ego usually decides against
using any float regvars, so ack -O1 (not running ego) uses the
regvars, but ack -O4 (running ego) doesn't use the regvars.
Beware that ack -mosxppc runs ego using powerpc.descr but -mlinuxppc
and -mqemuppc run ego without a config file (since 8ef7c31). I am
testing powerpc.descr with a local edit to plat/linuxppc/descr to run
ego with powerpc.descr there, but I did not commit my local edit.
2017-02-16 00:34:07 +00:00
|
|
|
pat ngf sdl $1==8 && inreg($2)==reg_float
|
|
|
|
with FREG
|
|
|
|
gen fneg {DLOCAL, $2}, %1
|
2007-11-02 18:56:58 +00:00
|
|
|
|
|
|
|
pat cmf $1==INT64 /* Compare double */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG FREG
|
2017-01-26 00:08:55 +00:00
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen extlwi %a, %a, {CONST, 2}, {CONST, 0}
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf teq $1==8 /* Double second == top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XEQ, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tne $1==8 /* Single second == top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XNE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tgt $1==8 /* Double second > top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XGT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tge $1==8 /* Double second >= top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XGE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tlt $1==8 /* Double second < top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XLT, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
pat cmf tle $1==8 /* Double second <= top */
|
|
|
|
with FREG FREG
|
|
|
|
uses REG={COND_FD, %2, %1}
|
|
|
|
gen move {XLE, %a}, %a
|
|
|
|
yields %a
|
|
|
|
|
|
|
|
proc cmf8zxx example cmf zeq
|
|
|
|
with FREG FREG STACK
|
|
|
|
uses REG
|
|
|
|
gen
|
2017-02-13 22:44:46 +00:00
|
|
|
fcmpo cr0, %2, %1
|
2017-01-26 00:08:55 +00:00
|
|
|
bxx* {LABEL, $2}
|
|
|
|
|
|
|
|
/* Pop 2 doubles, branch if... */
|
|
|
|
pat cmf zeq $1==8 call cmf8zxx("beq")
|
|
|
|
pat cmf zne $1==8 call cmf8zxx("bne")
|
|
|
|
pat cmf zgt $1==8 call cmf8zxx("bgt")
|
|
|
|
pat cmf zge $1==8 call cmf8zxx("bge")
|
|
|
|
pat cmf zlt $1==8 call cmf8zxx("blt")
|
|
|
|
pat cmf zle $1==8 call cmf8zxx("ble")
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2007-11-02 18:56:58 +00:00
|
|
|
pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
with FREG
|
|
|
|
uses reusing %1, FSREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
Enable the Hall check again, and get powerpc to pass it.
Upon enabling the check, mach/powerpc/ncg/table fails to build as ncgg
gives many errors of "Previous rule impossible on empty stack". David
Given reported this problem in 2013:
https://sourceforge.net/p/tack/mailman/message/30814694/
Commit c93cb69 commented out the error in util/ncgg/cgg.y to disable
the Hall check. This commit enables it again. In ncgg, the Hall
check is checking that a rule is possible with an empty fake stack.
It would be possible if ncg can coerce the values from the real stack
to the fake stack. The powerpc table defined coercions from STACK to
{FS, %a} and {FD, %a}, but the Hall check didn't understand the
coercions and rejected each rule "with FS" or "with FD".
This commit removes the FS and FD tokens and adds a new group of FSREG
registers for single-precision floats, while keeping FREG registers
for double precision. The registers overlap, with each FSREG
containing one FREG, because it is the same register in PowerPC
hardware. FS tokens become FSREG registers and FD tokens become FREG
registers. The Hall check understands the coercions from STACK to
FSREG and FREG. The idea to define separate but overlapping registers
comes from the PDP-11 table (mach/pdp/ncg/table).
This commit also removes F0 from the FREG group. This is my attempt
to keep F0 off the fake stack, because one of the stacking rules uses
F0 as a scratch register (FSCRATCH).
2016-09-18 19:08:55 +00:00
|
|
|
frsp %a, %1
|
|
|
|
yields %a
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-12 04:23:47 +00:00
|
|
|
/* Convert double to signed int */
|
|
|
|
pat loc loc cfi $1==8 && $2==4
|
|
|
|
with FREG STACK
|
|
|
|
uses reusing %1, FREG
|
2007-11-02 18:56:58 +00:00
|
|
|
gen
|
2017-02-12 04:23:47 +00:00
|
|
|
fctiwz %a, %1
|
2017-02-13 22:44:46 +00:00
|
|
|
stfdu %a, {IND_RC_D, sp, 0-8}
|
|
|
|
addi sp, sp, {CONST, 4}
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-02-12 04:23:47 +00:00
|
|
|
/* Convert double to unsigned int */
|
|
|
|
pat loc loc cfu $1==8 && $2==4
|
2017-02-13 21:38:26 +00:00
|
|
|
leaving
|
|
|
|
cal ".cfu8"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-17 21:00:28 +00:00
|
|
|
/* Convert signed int to double */
|
2017-02-12 04:23:47 +00:00
|
|
|
pat loc loc cif $1==4 && $2==8
|
2017-10-17 21:00:28 +00:00
|
|
|
leaving
|
|
|
|
cal ".cif8"
|
2016-12-09 21:36:42 +00:00
|
|
|
|
2017-10-17 21:00:28 +00:00
|
|
|
/* Convert unsigned int to double */
|
2017-02-12 04:23:47 +00:00
|
|
|
pat loc loc cuf $1==4 && $2==8
|
2017-10-17 21:00:28 +00:00
|
|
|
leaving
|
|
|
|
cal ".cuf8"
|
2016-10-17 04:39:59 +00:00
|
|
|
|
2017-02-12 21:44:37 +00:00
|
|
|
pat fef $1==8 /* Split fraction, exponent */
|
|
|
|
leaving
|
|
|
|
cal ".fef8"
|
2016-10-17 04:39:59 +00:00
|
|
|
|
2017-02-12 21:44:37 +00:00
|
|
|
/* Multiply two doubles, then split fraction, integer */
|
|
|
|
pat fif $1==8
|
|
|
|
leaving
|
|
|
|
cal ".fif8"
|