diff --git a/examples/hilo.p b/examples/hilo.p index b13bbd1a0..be953a09e 100644 --- a/examples/hilo.p +++ b/examples/hilo.p @@ -8,7 +8,8 @@ program hilo(input, output); type - string = array [0..255] of char; + string = packed array [0..255] of char; + charstar = packed array [0..0] of char; var playing : Boolean; @@ -34,18 +35,18 @@ function random(range : integer) : integer; that conflicts with a Pascal keyword. Luckily there's a private function uread() in the ACK Pascal library that we can use instead. } -function uread(fd : integer; var buffer : char; count : integer) : integer; +function uread(fd : integer; var buffer : charstar; count : integer) : integer; extern; function readchar : char; var - c : char; + c : charstar; dummy : integer; begin - c := chr(0); + c[0] := chr(0); dummy := uread(0, c, 1); - readchar := c; + readchar := c[0]; end; procedure readstring(var buffer : string; var length : integer); diff --git a/h/cgg_cg.h b/h/cgg_cg.h index a7802ad33..6cc04b007 100644 --- a/h/cgg_cg.h +++ b/h/cgg_cg.h @@ -39,7 +39,6 @@ #define DO_TOSTACK 23 #define DO_KILLREG 24 #define DO_LABDEF 25 -#define DO_STACKADJUST 26 #ifndef MAXATT #define MAXATT TOKENSIZE @@ -134,7 +133,6 @@ typedef struct exprnode *node_p; #define EX_ISROM 44 #define EX_TOPELTSIZE 45 #define EX_FALLTHROUGH 46 -#define EX_STACKOFFSET 47 typedef struct { /* to stack coercions */ diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index 8d72daef3..96d6690df 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -82,6 +82,7 @@ %token OP_TO_RA_SI %token OP_LA +%token OP_LI32 /* Other token types */ diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 7fc5d87f0..0f0bfdae7 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -98,6 +98,7 @@ /* Special instructions */ +0, OP_LI32, 0, "li32", 0, OP_LA, 0, "la", 0, OP_LA, 0, "li", 0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 31d85dd99..3f79ca86c 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -59,7 +59,7 @@ operation | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } | OP_LIL lil { emit4($1 | $2); } - | OP_LA la /* emitted in subrule */ + | OP_LI32 li32 /* emitted in subrule */ ; c @@ -194,7 +194,7 @@ bda } ; -la +li32 : GPR ',' expr { quad type = $3.typ & S_TYP; diff --git a/mach/powerpc/libem/aar4.s b/mach/powerpc/libem/aar4.s index 33b67e0dc..2c65af643 100644 --- a/mach/powerpc/libem/aar4.s +++ b/mach/powerpc/libem/aar4.s @@ -13,10 +13,14 @@ ! r3 = ptr to descriptor ! r4 = index ! r5 = address of array +! Yields: +! r3 = address of element +! r0 = size of element (used by .lar4, .sar4) +! Preserves r10 for .lar4, .sar4 .define .aar4 .aar4: - la r0, .trap_earray + li32 r0, .trap_earray mtspr ctr, r0 ! load CTR with trap address lwz r0, 0(r3) diff --git a/mach/powerpc/libem/build.lua b/mach/powerpc/libem/build.lua index d17adcd92..56278aa55 100644 --- a/mach/powerpc/libem/build.lua +++ b/mach/powerpc/libem/build.lua @@ -3,7 +3,6 @@ for _, plat in ipairs(vars.plats) do name = "lib_"..plat, srcs = { "./*.s", - "./*.c" }, vars = { plat = plat }, deps = { diff --git a/mach/powerpc/libem/cfu8.s b/mach/powerpc/libem/cfu8.s index 2e082c252..758df8572 100644 --- a/mach/powerpc/libem/cfu8.s +++ b/mach/powerpc/libem/cfu8.s @@ -13,15 +13,15 @@ .define .cfu8 .cfu8: - la r3, .fd_00000000 + li32 r3, .fd_00000000 lfd f0, 0(r3) ! f0 = 0.0 lfd f1, 0(sp) ! value to be converted - la r3, .fd_FFFFFFFF + li32 r3, .fd_FFFFFFFF lfd f3, 0(r3) ! f3 = 0xFFFFFFFF - la r3, .fd_80000000 + li32 r3, .fd_80000000 lfd f4, 0(r3) ! f4 = 0x80000000 fsel f2, f1, f1, f0 diff --git a/mach/powerpc/libem/cif8.s b/mach/powerpc/libem/cif8.s index 2e7b48d17..d2c82e54e 100644 --- a/mach/powerpc/libem/cif8.s +++ b/mach/powerpc/libem/cif8.s @@ -24,7 +24,7 @@ lfd f0, 0(sp) ! load value - la r3, pivot + li32 r3, pivot lfd f1, 0(r3) ! load pivot value fsub f0, f0, f1 ! adjust diff --git a/mach/powerpc/libem/cuf8.s b/mach/powerpc/libem/cuf8.s index ea5ec263a..5d5a12988 100644 --- a/mach/powerpc/libem/cuf8.s +++ b/mach/powerpc/libem/cuf8.s @@ -20,7 +20,7 @@ lfd f0, 0(sp) ! load value - la r3, pivot + li32 r3, pivot lfd f1, 0(r3) ! load pivot value fsub f0, f0, f1 ! adjust diff --git a/mach/powerpc/libem/fef8.c b/mach/powerpc/libem/fef8.c deleted file mode 100644 index 244d0fac8..000000000 --- a/mach/powerpc/libem/fef8.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * $Source$ - * $State$ - * $Revision$ - */ - -/* no headers allowed! */ - -/* Given a double, calculates the mantissa and exponent. - * - * This function is intended to be called internally by the code generator, - * so the calling convention is odd. - */ - -int __fef8(double* fp) -{ - double f = *fp; - int exponent, sign; - - if (f == 0.0) - return 0; - - if (f < 0.0) - { - sign = -1; - f = -f; - } - else - sign = 0; - - exponent = 0; - while (f >= 1.0) - { - f /= 2.0; - exponent++; - } - - while (f < 0.5) - { - f *= 2.0; - exponent--; - } - - *fp = (sign) ? -f : f; - return exponent; -} diff --git a/mach/powerpc/libem/fef8.s b/mach/powerpc/libem/fef8.s new file mode 100644 index 000000000..fc72b04f2 --- /dev/null +++ b/mach/powerpc/libem/fef8.s @@ -0,0 +1,58 @@ +#include "powerpc.h" + +.sect .text + +! Split a double-precision float into fraction and exponent, like +! frexp(3) in C. On entry: +! r3 = float, high word (bits 0..31) +! r4 = float, low word (bits 32..63) +! Yields: +! r3 = fraction, high word (bits 0..31) +! r4 = fraction, low word (bits 32..63) +! r5 = exponent +! Kills: cr0 f0 f1 r6 r7 + +.define .fef8 +.fef8: + ! IEEE double-precision format: + ! sign exponent fraction + ! 0 1..11 12..63 + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addis r7, r0, 0x7ff0 ! r7 = exponent mask + addi r5, r6, -1022 ! r5 = true exponent + cmpi cr0, 0, r6, 2047 + bclr IFTRUE, EQ, 0 ! return if infinity or NaN + cmpi cr0, 0, r6, 0 + bc IFFALSE, EQ, 1f ! jump if normalized number + + ! Got denormalized number or zero, probably zero. + rlwinm r6, r3, 0, 12, 31 + addi r5, r0, 0 ! r5 = true exponent = 0 + or. r6, r6, r4 ! r6 = high|low fraction + bclr IFTRUE, EQ, 0 ! return if zero + + ! Got denormalized number, not zero. + stwu r4, -4(sp) + stwu r3, -4(sp) + li32 r6, _2_64 + lfd f0, 0(sp) + lfd f1, 0(r6) + fmul f0, f0, f1 ! multiply it by 2**64 + stfd f0, 0(sp) + lwz r3, 0(sp) + lwz r4, 4(sp) + rlwinm r6, r3, 12, 21, 31 ! r6 = IEEE exponent + addi sp, sp, 8 + addi r5, r6, -1022 - 64 ! r5 = true exponent +1: + ! Put fraction in [0.5, 1) or (-1, -0.5] by setting its + ! exponent to true 0, IEEE 1022. + andc r3, r3, r7 ! clear old exponent + oris r3, r3, 1022 << 4 ! set new exponent + bclr ALWAYS, 0, 0 + +.sect .rom +_2_64: + ! (double) 2**64 + .data4 0x43f00000 + .data4 0x00000000 diff --git a/mach/powerpc/libem/fif8.s b/mach/powerpc/libem/fif8.s index 052c38cf2..a26c77830 100644 --- a/mach/powerpc/libem/fif8.s +++ b/mach/powerpc/libem/fif8.s @@ -1,38 +1,71 @@ -# -! $Source$ -! $State$ -! $Revision$ - #include "powerpc.h" - + .sect .text -! Multiplies two floats, and returns the fraction and integer. +! Multiplies two double-precision floats, then splits the product into +! integer and fraction, like modf(3) in C. On entry: +! f1 = float +! f2 = other float +! Yields: +! f1 = fraction +! f2 = integer +! Kills: cr0 f1 f2 r3 r4 r5 r6 .define .fif8 .fif8: - lfd f0, 8(sp) - lfd f1, 0(sp) - fmul f0, f0, f1 - fabs f1, f0 ! f0 = result - - ! The following chunk does f1 = floor(f1). See page 158 of the book. - - mtfsfi cr7, 3 ! set rounding mode to -inf. - mtfsb0 23 - fctid f2, f1 - fcfid f2, f2 - mcrfs cr7, cr5 - bc IFFALSE, 31, toobig - fmr f1, f2 -toobig: + fmul f1, f1, f2 + stfdu f1, -8(sp) ! push f1 = product + lwz r3, 0(sp) ! r3 = high word + lwz r4, 4(sp) ! r4 = low word - fabs f2, f1 ! f2 = fabs(f1) - fsub f2, f2, f1 - stfd f2, 8(sp) - - fneg f2, f1 - fsel f2, f0, f1, f2 - stfd f2, 0(sp) - + ! IEEE double-precision format: + ! sign exponent fraction + ! 0 1..11 12..63 + ! Subtract 1023 from the IEEE exponent. If the result is from + ! 0 to 51, then the IEEE fraction has that many integer bits. + ! (IEEE has an implicit 1 before its fraction. If the IEEE + ! fraction has 0 integer bits, we still have an integer.) + rlwinm r5, r3, 12, 21, 31 ! r5 = IEEE exponent + addic. r5, r5, -1023 ! r5 = nr of integer bits + bc IFTRUE, LT, no_int + cmpi cr0, 0, r5, 21 + bc IFTRUE, LT, small_int + cmpi cr0, 0, r5, 52 + bc IFTRUE, LT, big_int + + ! f1 is an integer without fraction. Jump to calculate + ! fraction f1 = f2 - f1. It will be zero (or perhaps NaN). + fmr f2, f1 + b subtract + +no_int: + ! f1 is a fraction without integer. + fsub f2, f1, f1 ! integer = zero + b done + +small_int: + ! f1 has r5 = 0 to 20 integer bits in the IEEE fraction. + ! High word has 20 - r5 fraction bits. + addi r6, r0, 20 + subf r6, r5, r6 + srw r3, r3, r6 + addi r4, r0, 0 ! clear low word + slw r3, r3, r6 ! clear fraction in high word + b move_int + +big_int: + ! f1 has r5 = 21 to 51 to integer bits. + ! Low word has 52 - r5 fraction bits. + addi r6, r0, 52 + subf r6, r5, r6 + srw r4, r4, r6 + slw r4, r4, r6 ! clear fraction in low word +move_int: + stw r3, 0(sp) + stw r4, 4(sp) + lfd f2, 0(sp) ! f2 = integer +subtract: + fsub f1, f1, f2 ! fraction = value - integer +done: + addi sp, sp, 8 ! restore stack pointer bclr ALWAYS, 0, 0 diff --git a/mach/powerpc/libem/lar4.s b/mach/powerpc/libem/lar4.s new file mode 100644 index 000000000..6375979c4 --- /dev/null +++ b/mach/powerpc/libem/lar4.s @@ -0,0 +1,43 @@ +# +#include "powerpc.h" + +.sect .text + +! Load from bounds-checked array. +! +! On entry: +! r3 = ptr to descriptor +! r4 = index +! r5 = address of array + +.define .lar4 +.lar4: + mfspr r10, lr + bl .aar4 + mtspr lr, r10 + ! r3 = ptr to element + ! r0 = size of element + + cmpi cr0, 0, r0, 1 + bc IFFALSE, EQ, 1f + ! Load 1 byte. + lbz r4, 0(r3) + stwu r4, -4(sp) + bclr ALWAYS, 0, 0 +1: + cmpi cr0, 0, r0, 2 + bc IFFALSE, EQ, 2f + ! Load 2 bytes. + lhz r4, 0(r3) + stwu r4, -4(sp) + bclr ALWAYS, 0, 0 +2: + ! Load r0 bytes, where r0 must be a positive multiple of 4. + subf sp, r0, sp ! move stack pointer down + or r5, r0, r0 ! index r5 = length r0 +3: + addic. r5, r5, -4 ! r5 -= 4 + lwzx r4, r5, r3 + stwx r4, r5, sp + bc IFTRUE, GT, 3b ! loop if r5 > 0 + bclr ALWAYS, 0, 0 diff --git a/mach/powerpc/libem/sar4.s b/mach/powerpc/libem/sar4.s new file mode 100644 index 000000000..0c1368af1 --- /dev/null +++ b/mach/powerpc/libem/sar4.s @@ -0,0 +1,45 @@ +# +#include "powerpc.h" + +.sect .text + +! Store to bounds-checked array. +! +! On entry: +! r3 = ptr to descriptor +! r4 = index +! r5 = address of array + +.define .sar4 +.sar4: + mfspr r10, lr + bl .aar4 + mtspr lr, r10 + ! r3 = ptr to element + ! r0 = size of element + + cmpi cr0, 0, r0, 1 + bc IFFALSE, EQ, 1f + ! Store 1 byte. + lwz r4, 0(sp) + addi sp, sp, 4 + stb r4, 0(r3) + bclr ALWAYS, 0, 0 +1: + cmpi cr0, 0, r0, 2 + bc IFFALSE, EQ, 2f + ! Store 2 bytes. + lwz r4, 0(sp) + addi sp, sp, 4 + sth r4, 0(r3) + bclr ALWAYS, 0, 0 +2: + ! Store r0 bytes, where r0 must be a positive multiple of 4. + or r5, r0, r0 ! index r5 = length r0 +3: + addic. r5, r5, -4 ! r5 -= 4 + lwzx r4, r5, sp + stwx r4, r5, r3 + bc IFTRUE, GT, 3b ! loop if r5 > 0 + add sp, r0, sp ! move stack pointer up + bclr ALWAYS, 0, 0 diff --git a/mach/powerpc/mcg/table b/mach/powerpc/mcg/table index 7023c08b6..61729d308 100644 --- a/mach/powerpc/mcg/table +++ b/mach/powerpc/mcg/table @@ -389,7 +389,7 @@ PATTERNS out:(long)reg = FROMUI.L(in:(int)reg) emit "mr %out.0, %in" - emit "li %out.1, 0" + emit "li32 %out.1, 0" cost 8; out:(ret)reg = FROMF.I(in:(dret)reg) @@ -661,15 +661,15 @@ PATTERNS ALUCC(EOR.I, "xori") out:(int)reg = value:LABEL.I - emit "la %out, $value" + emit "li32 %out, $value" cost 4; out:(int)reg = value:BLOCK.I - emit "la %out, $value" + emit "li32 %out, $value" cost 4; out:(int)reg = value:CONST.I - emit "li %out, $value" + emit "li32 %out, $value" cost 8; @@ -695,7 +695,7 @@ PATTERNS out:(float)reg = in:CONST.F when specific_constant(%in, 0) - emit "la r0, .fd_00000000" + emit "li32 r0, .fd_00000000" emit "lfs %out, 0(r0)" cost 12; diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 742e8250f..ed107aceb 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -18,14 +18,14 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define smalls(n) sfit(n, 16) #define smallu(n) ufit(n, 16) -#define lo(n) (n & 0xFFFF) -#define hi(n) ((n>>16) & 0xFFFF) +#define lo(n) ((n) & 0xFFFF) +#define hi(n) (((n)>>16) & 0xFFFF) /* Use these for instructions that treat the low half as signed --- his() * includes a modifier to produce the correct value when the low half gets * sign extended. Er, do make sure you load the low half second. */ -#define los(n) (n & 0xFFFF) -#define his(n) ((hi(n) - (lo(n)>>15)) & 0xFFFF) +#define los(n) (lo(n) | (((0-(lo(n)>>15)) & ~0xFFFF))) +#define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF) #define IFFALSE {CONST, 4} #define IFTRUE {CONST, 12} @@ -42,8 +42,10 @@ PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ + REG_PAIR /* speed hack for sti 8 */ FPR /* any FPR */ FREG /* any allocatable FPR */ + FSREG /* any allocatable single-precision FPR */ SPR /* any SPR */ CR /* any CR */ @@ -96,24 +98,35 @@ REGISTERS SP("sp") : GPR, GPRSP. R0("r0") : GPR, GPR0. - F31("f31") : FPR, FREG, FPR31. - F30("f30") : FPR, FREG, FPR30. - F29("f29") : FPR, FREG, FPR29. - F28("f28") : FPR, FREG, FPR28. - F27("f27") : FPR, FREG, FPR27. - F26("f26") : FPR, FREG, FPR26. - F25("f25") : FPR, FREG, FPR25. - F24("f24") : FPR, FREG, FPR24. - F23("f23") : FPR, FREG, FPR23. - F22("f22") : FPR, FREG, FPR22. - F21("f21") : FPR, FREG, FPR21. - F20("f20") : FPR, FREG, FPR20. - F19("f19") : FPR, FREG, FPR19. - F18("f18") : FPR, FREG, FPR18. - F17("f17") : FPR, FREG, FPR17. - F16("f16") : FPR, FREG, FPR16. - F15("f15") : FPR, FREG, FPR15. - F14("f14") : FPR, FREG, FPR14. + /* speed hack for sti 8 */ + PAIR_R9_R10=R9+R10 : REG_PAIR. + PAIR_R7_R8=R7+R8 : REG_PAIR. + PAIR_R5_R6=R5+R6 : REG_PAIR. + PAIR_R3_R4=R3+R4 : REG_PAIR. + + /* + * F14 to F31 are reserved for regvar, if we ever implement + * it. Don't add them to FREG; the register allocator would + * be too slow. + */ + F31("f31") : FPR, FPR31. + F30("f30") : FPR, FPR30. + F29("f29") : FPR, FPR29. + F28("f28") : FPR, FPR28. + F27("f27") : FPR, FPR27. + F26("f26") : FPR, FPR26. + F25("f25") : FPR, FPR25. + F24("f24") : FPR, FPR24. + F23("f23") : FPR, FPR23. + F22("f22") : FPR, FPR22. + F21("f21") : FPR, FPR21. + F20("f20") : FPR, FPR20. + F19("f19") : FPR, FPR19. + F18("f18") : FPR, FPR18. + F17("f17") : FPR, FPR17. + F16("f16") : FPR, FPR16. + F15("f15") : FPR, FPR15. + F14("f14") : FPR, FPR14. F13("f13") : FPR, FREG, FPR13. F12("f12") : FPR, FREG, FPR12. F11("f11") : FPR, FREG, FPR11. @@ -127,13 +140,27 @@ REGISTERS F3("f3") : FPR, FREG, FPR3. F2("f2") : FPR, FREG, FPR2. F1("f1") : FPR, FREG, FPR1. - F0("f0") : FPR, FREG, FPR0. + F0("f0") : FPR, FPR0. + + FS13("f13")=F13 : FSREG. + FS12("f12")=F12 : FSREG. + FS11("f11")=F11 : FSREG. + FS10("f10")=F10 : FSREG. + FS9("f9")=F9 : FSREG. + FS8("f8")=F8 : FSREG. + FS7("f7")=F7 : FSREG. + FS6("f6")=F6 : FSREG. + FS5("f5")=F5 : FSREG. + FS4("f4")=F4 : FSREG. + FS3("f3")=F3 : FSREG. + FS2("f2")=F2 : FSREG. + FS1("f1")=F1 : FSREG. LR("lr") : SPR. CTR("ctr") : SPR. C0("cr0") : CR, CR0. -#define SCRATCH R11 +#define RSCRATCH R11 #define FSCRATCH F0 @@ -142,22 +169,30 @@ TOKENS /* Used only in instruction descriptions (to generate the correct syntax). */ GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". - GPRINDIRECTLO = { GPR reg; ADDR adr; } 4 ">" adr "(" reg ")". /* Warning! Do not use on labels. */ - HILABEL = { ADDR adr; } 4 "<" adr. - LOLABEL = { ADDR adr; } 4 ">" adr. + CONST = { INT val; } 4 val. /* Primitives */ LABEL = { ADDR adr; } 4 adr. - CONST = { INT val; } 4 val. LOCAL = { INT off; } 4. /* Allows us to use regvar() to refer to registers */ GPRE = { GPR reg; } 4 reg. +/* Constants on the stack */ + + CONST_N8000 = { INT val; } 4. + CONST_N7FFF_N0001 = { INT val; } 4. + CONST_0000_7FFF = { INT val; } 4. + CONST_8000 = { INT val; } 4. + CONST_8001_FFFF = { INT val; } 4. + CONST_HZ = { INT val; } 4. + CONST_HL = { INT val; } 4. + /* Expression partial results */ - + + SUM_RIS = { GPR reg; INT offhi; } 4. SUM_RC = { GPR reg; INT off; } 4. SUM_RR = { GPR reg1; GPR reg2; } 4. @@ -172,41 +207,39 @@ TOKENS SEX_H = { GPR reg; } 4. IND_RC_B = { GPR reg; INT off; } 4. + IND_RR_B = { GPR reg1; GPR reg2; } 4. IND_RC_H = { GPR reg; INT off; } 4. + IND_RR_H = { GPR reg1; GPR reg2; } 4. IND_RC_H_S = { GPR reg; INT off; } 4. + IND_RR_H_S = { GPR reg1; GPR reg2; } 4. IND_RC_W = { GPR reg; INT off; } 4. IND_RR_W = { GPR reg1; GPR reg2; } 4. - IND_LABEL_W = { ADDR adr; } 4. IND_RC_D = { GPR reg; INT off; } 8. IND_RR_D = { GPR reg1; GPR reg2; } 8. - IND_LABEL_D = { ADDR adr; } 8. NOT_R = { GPR reg; } 4. AND_RR = { GPR reg1; GPR reg2; } 4. - AND_RC = { GPR reg; INT val; } 4. OR_RR = { GPR reg1; GPR reg2; } 4. - OR_RC = { GPR reg; INT val; } 4. + OR_RIS = { GPR reg; INT valhi; } 4. + OR_RC = { GPR reg; INT val; } 4. XOR_RR = { GPR reg1; GPR reg2; } 4. - XOR_RC = { GPR reg; INT val; } 4. - -/* Floats */ - - FD = { FPR reg; } 8 reg. - FS = { FPR reg; } 4 reg. - -/* Comments */ - - LABELI = { ADDR msg; INT num; } 4 msg " " num. - - + XOR_RIS = { GPR reg; INT valhi; } 4. + XOR_RC = { GPR reg; INT val; } 4. SETS - TOKEN = LABEL + CONST + LOCAL. - GPRI = GPR + GPRE. - + /* signed 16-bit integer */ + CONST2 = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF. + /* integer that, when negated, fits signed 16-bit */ + CONST2_WHEN_NEG = CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000. + /* unsigned 16-bit integer */ + UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. + /* any constant on stack */ + CONST_ALL = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + + CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. + SUM_ALL = SUM_RC + SUM_RR. TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + @@ -214,112 +247,127 @@ SETS SEX_ALL = SEX_B + SEX_H. - LOGICAL_ALL = NOT_R + AND_RR + AND_RC + OR_RR + OR_RC + XOR_RR + + LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. - - IND_ALL_W = IND_RC_W + IND_RR_W + IND_LABEL_W. - IND_ALL_D = IND_RC_D + IND_RR_D + IND_LABEL_D. - + /* indirect 4-byte value */ + IND_ALL_W = IND_RC_W + IND_RR_W. + /* indirect 8-byte value */ + IND_ALL_D = IND_RC_D + IND_RR_D. + /* any indirect value that fits in a GPR */ + IND_ALL_BHW = IND_RC_B + IND_RR_B + IND_RC_H + IND_RR_H + + IND_RC_H_S + IND_RR_H_S + IND_ALL_W. + + /* anything killed by sti (store indirect) */ + MEMORY = IND_ALL_BHW + IND_ALL_D. + OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + IND_ALL_W. INSTRUCTIONS - add GPRI:wo, GPRI:ro, GPRI:ro. - addX "add." GPRI:wo, GPRI:ro, GPRI:ro. - addi GPRI:wo, GPRI:ro, CONST:ro. - addis GPRI:wo, GPRI:ro, CONST+HILABEL:ro. - and GPRI:wo, GPRI:ro, GPRI:ro. - andc GPRI:wo, GPRI:ro, GPRI:ro. - andiX "andi." GPRI:wo, GPRI:ro, CONST:ro kills :cc. - andisX "andis." GPRI:wo, GPRI:ro, CONST:ro kills :cc. + /* We give time as cycles of total latency from Freescale + * Semiconductor, MPC7450 RISC Microprocessor Family Reference + * Manual, Rev. 5, section 6.6. + * + * We have only 4-byte alignment for doubles; 8-byte alignment is + * optimal. We guess the misalignment penalty by adding 1 cycle to + * the cost of loading or storing a double: + * lfd lfdu lfdx: 4 -> 5 + * stfd stfdu stfdx: 3 -> 4 + */ + cost(4, 1) /* space, time */ + + add GPR:wo, GPR:ro, GPR:ro. + addX "add." GPR:wo, GPR:ro, GPR:ro. + addi GPR:wo, GPR:ro, CONST:ro. + addis GPR:wo, GPR:ro, CONST:ro. + and GPR:wo, GPR:ro, GPR:ro. + andc GPR:wo, GPR:ro, GPR:ro. + andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro. + andisX "andis." GPR:wo:cc, GPR:ro, CONST:ro. b LABEL:ro. bc CONST:ro, CONST:ro, LABEL:ro. bcctr CONST:ro, CONST:ro, CONST:ro. bcctrl CONST:ro, CONST:ro, CONST:ro. bclr CONST:ro, CONST:ro, CONST:ro. bl LABEL:ro. - cmp CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpi CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - cmpl CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpli CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - divw GPRI:wo, GPRI:ro, GPRI:ro. - divwu GPRI:wo, GPRI:ro, GPRI:ro. - eqv GPRI:wo, GPRI:ro, GPRI:ro. - extsb GPRI:wo, GPRI:ro. - extsh GPRI:wo, GPRI:ro. - fadd FD:wo, FD:ro, FD:ro. - fadds FS:wo, FS:ro, FS:ro. - fcmpo CR:wo, FD:ro, FD:ro. - fdiv FD:wo, FD:ro, FD:ro. - fdivs FS:wo, FS:ro, FS:ro. - fneg FS+FD:wo, FS+FD:ro. - fmul FD:wo, FD:ro, FD:ro. - fmuls FS:wo, FS:ro, FS:ro. - frsp FS:wo, FD:ro. - fsub FD:wo, FD:ro, FD:ro. - fsubs FS:wo, FS:ro, FS:ro. - fmr FS+FD:wo, FS+FD:ro. - la GPRI:wo, LABEL:ro. - lbzx GPRI:wo, GPR:ro, GPR:ro. - lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfd FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdu FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdx FD:wo, GPR:ro, GPR:ro. - lfs FS:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfsu FS:wo, GPRINDIRECT+GPRINDIRECTLO:rw. - lfsx FS:wo, GPR:ro, GPR:ro. - lhzx GPRI:wo, GPR:ro, GPR:ro. - lhax GPRI:wo, GPR:ro, GPR:ro. - lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lhz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lwzu GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lwzx GPRI:wo, GPR:ro, GPR:ro. - lwz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - nand GPRI:wo, GPRI:ro, GPRI:ro. - neg GPRI:wo, GPRI:ro. - nor GPRI:wo, GPRI:ro, GPRI:ro. - mfcr GPRI:wo. - mullw GPRI:wo, GPRI:ro, GPRI:ro. - mfspr GPRI:wo, SPR:ro. - mtspr SPR:wo, GPRI:ro. - or GPRI:wo, GPRI:ro, GPRI:ro. - orc GPRI:wo, GPRI:ro, GPRI:ro. - ori GPRI:wo, GPRI:ro, CONST+LOLABEL:ro. - orX "or." GPRI:wo, GPRI:ro, GPRI:ro kills :cc. - rlwinm GPRI:wo, GPRI:ro, CONST:ro, CONST:ro, CONST:ro. - slw GPRI:wo, GPRI:ro, GPRI:ro. - subf GPRI:wo, GPRI:ro, GPRI:ro. - sraw GPRI:wo, GPRI:ro, GPRI:ro. - srawi GPRI:wo, GPRI:ro, CONST:ro. - srw GPRI:wo, GPRI:ro, GPRI:ro. - stb GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stbx GPRI:ro, GPR:ro, GPR:ro. - stfd FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdu FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdx FD:ro, GPR:ro, GPR:ro. - stfs FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsu FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsx FS:ro, GPR:ro, GPR:ro. - sth GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - sthx GPRI:ro, GPR:ro, GPR:ro. - stw GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stwx GPRI:ro, GPR:ro, GPR:ro. - stwu GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - xor GPRI:wo, GPRI:ro, GPRI:ro. - xori GPRI:wo, GPRI:ro, CONST:ro. + cmp CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpi CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + cmpl CR:ro, CONST:ro, GPR:ro, GPR:ro kills :cc. + cmpli CR:ro, CONST:ro, GPR:ro, CONST:ro kills :cc. + divw GPR:wo, GPR:ro, GPR:ro cost(4, 23). + divwu GPR:wo, GPR:ro, GPR:ro cost(4, 23). + eqv GPR:wo, GPR:ro, GPR:ro. + extsb GPR:wo, GPR:ro. + extsh GPR:wo, GPR:ro. + fadd FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fadds FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + fcmpo CR:wo, FPR:ro, FPR:ro cost(4, 5). + fdiv FREG:wo, FREG:ro, FREG:ro cost(4, 35). + fdivs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 21). + fmr FPR:wo, FPR:ro cost(4, 5). + fmr FSREG:wo, FSREG:ro cost(4, 5). + fmul FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fmuls FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + fneg FREG:wo, FREG:ro cost(4, 5). + fneg FSREG:wo, FSREG:ro cost(4, 5). + frsp FSREG:wo, FREG:ro cost(4, 5). + fsub FREG:wo, FREG:ro, FREG:ro cost(4, 5). + fsubs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). + lbz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lfd FPR:wo, GPRINDIRECT:ro cost(4, 5). + lfdu FPR:wo, GPRINDIRECT:ro cost(4, 5). + lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). + lfs FSREG:wo, GPRINDIRECT:ro cost(4, 4). + lfsu FSREG:wo, GPRINDIRECT:rw cost(4, 4). + lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). + lha GPR:wo, GPRINDIRECT:ro cost(4, 3). + lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lhz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + li32 GPR:wo, LABEL:ro cost(8, 2). + lwzu GPR:wo, GPRINDIRECT:ro cost(4, 3). + lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). + lwz GPR:wo, GPRINDIRECT:ro cost(4, 3). + nand GPR:wo, GPR:ro, GPR:ro. + neg GPR:wo, GPR:ro. + nor GPR:wo, GPR:ro, GPR:ro. + mfcr GPR:wo cost(4,2). + mullw GPR:wo, GPR:ro, GPR:ro cost(4, 4). + mfspr GPR:wo, SPR:ro cost(4, 3). + mtspr SPR:wo, GPR:ro cost(4, 2). + or GPR:wo, GPR:ro, GPR:ro. + orc GPR:wo, GPR:ro, GPR:ro. + ori GPR:wo, GPR:ro, CONST:ro. + oris GPR:wo, GPR:ro, CONST:ro. + orX "or." GPR:wo:cc, GPR:ro, GPR:ro. + rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. + slw GPR:wo, GPR:ro, GPR:ro. + subf GPR:wo, GPR:ro, GPR:ro. + sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). + srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). + srw GPR:wo, GPR:ro, GPR:ro. + stb GPR:ro, GPRINDIRECT:rw cost(4, 3). + stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stfd FPR:ro, GPRINDIRECT:rw cost(4, 4). + stfdu FPR:ro, GPRINDIRECT:rw cost(4, 4). + stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). + stfs FSREG:ro, GPRINDIRECT:rw cost(4, 3). + stfsu FSREG:ro, GPRINDIRECT:rw cost(4, 3). + stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). + sth GPR:ro, GPRINDIRECT:rw cost(4, 3). + sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stw GPR:ro, GPRINDIRECT:rw cost(4, 3). + stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). + stwu GPR+GPRE:ro, GPRINDIRECT:rw cost(4, 3). + xor GPR:wo, GPR:ro, GPR:ro. + xori GPR:wo, GPR:ro, CONST:ro. + xoris GPR:wo, GPR:ro, CONST:ro. - gpr_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - gpr_gpr_si GPRI:wo, GPRI:ro, CONST:ro. - gpr_ro_gprindirect GPRI:ro, GPRINDIRECT:rw. - gpr_ro_gpr_gpr GPRI:ro, GPRI:ro, GPRI:ro. - gpr_wo_gprindirect GPRI:wo, GPRINDIRECT:ro. - gpr_wo_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - - invalid "invalid". - comment "!" LABEL+LABELI:ro. + comment "!" LABEL:ro cost(0, 0). @@ -336,25 +384,27 @@ MOVES from GPR to GPRE gen COMMENT("move GPR->GPRE") - or %2, %1, %1 + or %2.reg, %1, %1 /* Constants */ - from CONST smalls(%val) to GPR + from CONST_ALL smalls(%val) to GPR gen - COMMENT("move CONST->GPRE") - addi %2, R0, {CONST, lo(%1.val)} - - from CONST to GPR + COMMENT("move CONST_ALL->GPR smalls") + addi %2, R0, {CONST, %1.val} + + from CONST_ALL + CONST to GPR gen - COMMENT("move CONST->GPRE") + COMMENT("move CONST_ALL->GPR") addis %2, R0, {CONST, hi(%1.val)} ori %2, %2, {CONST, lo(%1.val)} - + /* Can't use addi %2, %2, {CONST, los(%1.val)} + * because %2 might be R0. */ + from LABEL to GPR gen COMMENT("move LABEL->GPR") - la %2, {LABEL, %1.adr} + li32 %2, {LABEL, %1.adr} /* Sign extension */ @@ -370,226 +420,147 @@ MOVES /* Register + something */ - from SUM_RC smalls(%off) to GPR - gen - COMMENT("move SUM_RC->GPR smalls") - addi %2, %1.reg, {CONST, lo(%1.off)} - + from SUM_RIS to GPR + gen + COMMENT("move SUM_RIS->GPR") + addis %2, %1.reg, {CONST, %1.offhi} + from SUM_RC to GPR - gen - COMMENT("move SUM_RC->GPR large") - addi %2, %1.reg, {CONST, los(%1.off)} - addis %2, %2, {CONST, his(%1.off)} - + gen + COMMENT("move SUM_RC->GPR") + addi %2, %1.reg, {CONST, %1.off} + from SUM_RR to GPR gen COMMENT("move SUM_RR->GPR") add %2, %1.reg1, %1.reg2 - - from SUM_RR to GPR - gen - COMMENT("move SUM_RR->GPRE") - add %2, %1.reg1, %1.reg2 - -/* Read/write byte */ - from IND_RC_B smalls(%off) to GPR - gen - COMMENT("move IND_RC_B->GPR small") - lbz %2, {GPRINDIRECT, %1.reg, %1.off} - +/* Read byte */ + from IND_RC_B to GPR gen - COMMENT("move IND_RC_B->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lbz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from GPR to IND_RC_B smalls(%off) + COMMENT("move IND_RC_B->GPR") + lbz %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RR_B to GPR gen - COMMENT("move GPR->IND_RC_B small") - stb %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_RR_B->GPR") + lbzx %2, %1.reg1, %1.reg2 + +/* Write byte */ + from GPR to IND_RC_B gen - COMMENT("move GPR->IND_RC_B large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stb %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - -/* Read/write short */ + COMMENT("move GPR->IND_RC_B") + stb %1, {GPRINDIRECT, %2.reg, %2.off} - from IND_RC_H smalls(%off) to GPR + from GPR to IND_RR_B gen - COMMENT("move IND_RC_H->GPR small") - lhz %2, {GPRINDIRECT, %1.reg, %1.off} - + COMMENT("move GPR->IND_RR_B") + stbx %1, %2.reg1, %2.reg2 + +/* Read halfword (short) */ + from IND_RC_H to GPR gen - COMMENT("move IND_RC_H->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lhz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from IND_RC_H_S smalls(%off) to GPR + COMMENT("move IND_RC_H->GPR") + lhz %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RR_H to GPR gen - COMMENT("move IND_RC_H_S->GPR small") - lha %2, {GPRINDIRECT, %1.reg, %1.off} - + COMMENT("move IND_RR_H->GPR") + lhzx %2, %1.reg1, %1.reg2 + from IND_RC_H_S to GPR gen - COMMENT("move IND_RC_H_S->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lha %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from GPR to IND_RC_H smalls(%off) + COMMENT("move IND_RC_H_S->GPR") + lha %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RR_H_S to GPR gen - COMMENT("move GPR->IND_RC_H small") - sth %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_RR_H_S->GPR") + lhax %2, %1.reg1, %1.reg2 + +/* Write halfword */ + from GPR to IND_RC_H gen - COMMENT("move GPR->IND_RC_H large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - sth %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - + COMMENT("move GPR->IND_RC_H") + sth %1, {GPRINDIRECT, %2.reg, %2.off} + + from GPR to IND_RR_H + gen + COMMENT("move GPR->IND_RR_H") + sthx %1, %2.reg1, %2.reg2 + /* Read word */ - from IND_RC_W smalls(%off) to GPR - gen - COMMENT("move IND_RC_W->GPR small") - lwz %2, {GPRINDIRECT, %1.reg, %1.off} - from IND_RC_W to GPR gen - COMMENT("move IND_RC_W->GPR large") - addis %2, %1.reg, {CONST, his(%1.off)} - lwz %2, {GPRINDIRECT, %2, los(%1.off)} + COMMENT("move IND_RC_W->GPR") + lwz %2, {GPRINDIRECT, %1.reg, %1.off} from IND_RR_W to GPR gen COMMENT("move IND_RR_W->GPR") lwzx %2, %1.reg1, %1.reg2 - - from IND_LABEL_W to GPR - gen - COMMENT("move IND_LABEL_W->GPR") - move {LABEL, %1.adr}, SCRATCH - lwz %2, {GPRINDIRECT, SCRATCH, 0} - - from IND_RC_W smalls(%off) to FS - gen - COMMENT("move IND_RC_W->FS small") - lfs %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_W to FS - gen - COMMENT("move IND_RC_W->FS large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfs %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - from IND_RR_W to FS + from IND_RC_W to FSREG gen - COMMENT("move IND_RR_W->FS") + COMMENT("move IND_RC_W->FSREG") + lfs %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RR_W to FSREG + gen + COMMENT("move IND_RR_W->FSREG") lfsx %2, %1.reg1, %1.reg2 - - from IND_LABEL_W to FS - gen - COMMENT("move IND_LABEL_W->FS") - move {LABEL, %1.adr}, SCRATCH - lfs %2, {GPRINDIRECT, SCRATCH, 0} - + /* Write word */ - from GPR to IND_RC_W smalls(%off) - gen - COMMENT("move GPR->IND_RC_W small") - stw %1, {GPRINDIRECT, %2.reg, %2.off} - from GPR to IND_RC_W gen - COMMENT("move GPR->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stw %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + COMMENT("move GPR->IND_RC_W") + stw %1, {GPRINDIRECT, %2.reg, %2.off} from GPR to IND_RR_W gen COMMENT("move GPR->IND_RR_W") stwx %1, %2.reg1, %2.reg2 - - from GPR to IND_LABEL_W - gen - COMMENT("move GPR->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stw %1, {GPRINDIRECT, SCRATCH, 0} - - from FS to IND_RC_W smalls(%off) - gen - COMMENT("move FS->IND_RC_W small") - stfs %1, {GPRINDIRECT, %2.reg, %2.off} - - from FS to IND_RC_W - gen - COMMENT("move FS->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfs %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - from FS to IND_RR_W + from FSREG to IND_RC_W gen - COMMENT("move FS->IND_RR_W") + COMMENT("move FSREG->IND_RC_W") + stfs %1, {GPRINDIRECT, %2.reg, %2.off} + + from FSREG to IND_RR_W + gen + COMMENT("move FSREG->IND_RR_W") stfsx %1, %2.reg1, %2.reg2 - from FS to IND_LABEL_W - gen - COMMENT("move FS->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfs %1, {GPRINDIRECT, SCRATCH, 0} - /* Read double */ - from IND_RC_D smalls(%off) to FD + from IND_RC_D to FPR gen - COMMENT("move IND_RC_D->FD small") + COMMENT("move IND_RC_D->FPR") lfd %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_D to FD + + from IND_RR_D to FPR gen - COMMENT("move IND_RC_D->FD large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfd %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from IND_RR_D to FD - gen - COMMENT("move IND_RR_D->FD") + COMMENT("move IND_RR_D->FPR") lfdx %2, %1.reg1, %1.reg2 - from IND_LABEL_D to FD - gen - COMMENT("move IND_LABEL_D->FD") - move {LABEL, %1.adr}, SCRATCH - lfd %2, {GPRINDIRECT, SCRATCH, 0} - /* Write double */ - from FD to IND_RC_D smalls(%off) + from FPR to IND_RC_D gen - COMMENT("move FD->IND_RC_D small") + COMMENT("move FPR->IND_RC_D") stfd %1, {GPRINDIRECT, %2.reg, %2.off} - - from FD to IND_RC_D - gen - COMMENT("move FD->IND_RC_D large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfd %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - from FD to IND_RR_D + from FPR to IND_RR_D gen - COMMENT("move FD->IND_RR_W") + COMMENT("move FPR->IND_RR_W") stfdx %1, %2.reg1, %2.reg2 - - from FD to IND_LABEL_D - gen - COMMENT("move FD->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfd %1, {GPRINDIRECT, SCRATCH, 0} - + /* Extract condition code field (actually produces (CC&3)<<2) */ from CR0 to GPR @@ -611,8 +582,8 @@ MOVES from TRISTATE_RC_S to CR0 gen COMMENT("move TRISTATE_RC_S->CR0 large") - move {CONST, %1.val}, SCRATCH - cmp %2, {CONST, 0}, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + cmp %2, {CONST, 0}, %1.reg, RSCRATCH from TRISTATE_RC_U smallu(%val) to CR0 gen @@ -622,34 +593,34 @@ MOVES from TRISTATE_RC_U to CR0 gen COMMENT("move TRISTATE_RC_U->CR0") - move {CONST, %1.val}, SCRATCH - cmpl %2, {CONST, 0}, %1.reg, SCRATCH + move {CONST, %1.val}, RSCRATCH + cmpl %2, {CONST, 0}, %1.reg, RSCRATCH from TRISTATE_FF to CR0 gen COMMENT("move TRISTATE_FF->CR0") - fcmpo %2, {FD, %1.reg1}, {FD, %1.reg2} + fcmpo %2, %1.reg1, %1.reg2 from GPR to CR0 gen COMMENT("move GPR->CR0") - orX SCRATCH, %1, %1 /* alas, can't call test */ + orX RSCRATCH, %1, %1 /* alas, can't call test */ from TRISTATE_RR_S + TRISTATE_RC_S + TRISTATE_FF to GPR gen COMMENT("move TRISTATE_R*_S->GPR") move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tristate_s_table"}, %2 - lwzx %2, %2, SCRATCH + lwzx %2, %2, RSCRATCH from TRISTATE_RR_U + TRISTATE_RC_U to GPR gen COMMENT("move TRISTATE_R*_U->GPR") move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tristate_u_table"}, %2 - lwzx %2, %2, SCRATCH + lwzx %2, %2, RSCRATCH /* Logicals */ @@ -663,52 +634,39 @@ MOVES COMMENT("move AND_RR->GPR") and %2, %1.reg1, %1.reg2 - from AND_RC smallu(%val) to GPR - gen - COMMENT("move AND_RC->GPR small") - andiX %2, %1.reg, {CONST, %1.val} - - from AND_RC to GPR - gen - COMMENT("move AND_RC->GPR") - move {CONST, %1.val}, SCRATCH - and %2, %1.reg, SCRATCH - from OR_RR to GPR gen COMMENT("move OR_RR->GPR") or %2, %1.reg1, %1.reg2 - from OR_RC smallu(%val) to GPR + from OR_RIS to GPR gen - COMMENT("move OR_RC->GPR small") - ori %2, %1.reg, {CONST, %1.val} + COMMENT("move OR_RIS->GPR") + oris %2, %1.reg, {CONST, %1.valhi} from OR_RC to GPR gen COMMENT("move OR_RC->GPR") - move {CONST, %1.val}, SCRATCH - or %2, %1.reg, SCRATCH + ori %2, %1.reg, {CONST, %1.val} from XOR_RR to GPR gen COMMENT("move XOR_RR->GPR") xor %2, %1.reg1, %1.reg2 - from XOR_RC smallu(%val) to GPR + from XOR_RIS to GPR gen - COMMENT("move XOR_RC->GPR small") - xori %2, %1.reg, {CONST, %1.val} + COMMENT("move XOR_RIS->GPR") + xoris %2, %1.reg, {CONST, %1.valhi} from XOR_RC to GPR gen COMMENT("move XOR_RC->GPR") - move {CONST, %1.val}, SCRATCH - xor %2, %1.reg, SCRATCH + xori %2, %1.reg, {CONST, %1.val} /* Miscellaneous */ - from OP_ALL_W + LABEL + CONST to GPRE + from OP_ALL_W + LABEL + CONST_ALL to GPRE gen move %1, %2.reg @@ -717,74 +675,76 @@ TESTS to test GPR gen - orX SCRATCH, %1, %1 + orX RSCRATCH, %1, %1 STACKINGRULES - - from GPR to STACK + + from LOCAL to STACK gen - COMMENT("stack GPR") + COMMENT("stack LOCAL") + stwu {GPRE, regvar(%1.off)}, {GPRINDIRECT, SP, 0-4} + + from REG to STACK + gen + COMMENT("stack REG") stwu %1, {GPRINDIRECT, SP, 0-4} - - from CONST to STACK - uses REG + + from REG_PAIR to STACK gen - COMMENT("stack CONST") - move %1, %a - stwu %a, {GPRINDIRECT, SP, 0-4} - - from LABEL to STACK - uses REG + COMMENT("stack REG_PAIR") + stwu %1.2, {GPRINDIRECT, SP, 0-4} + stwu %1.1, {GPRINDIRECT, SP, 0-4} + + from CONST_ALL + LABEL to STACK gen - COMMENT("stack LABEL") - move %1, {GPRE, %a} - stwu %a, {GPRINDIRECT, SP, 0-4} - + COMMENT("stack CONST_ALL + LABEL") + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + from SEX_B to STACK gen COMMENT("stack SEX_B") - extsb SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + extsb RSCRATCH, %1.reg + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from SEX_H to STACK gen COMMENT("stack SEX_H") - extsh SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + extsh RSCRATCH, %1.reg + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK gen - move %1, {GPRE, SCRATCH} - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + COMMENT("stack SUM_ALL + TRISTATE_ALL + LOGICAL_ALL") + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} - from IND_ALL_W to STACK + from IND_ALL_BHW to STACK gen - move %1, SCRATCH - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + COMMENT("stack IND_ALL_BHW") + move %1, RSCRATCH + stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} from IND_ALL_D to STACK gen - move %1, {FD, FSCRATCH} - stfdu {FD, FSCRATCH}, {GPRINDIRECT, SP, 0-8} + COMMENT("stack IND_ALL_D") + move %1, FSCRATCH + stfdu FSCRATCH, {GPRINDIRECT, SP, 0-8} - from FD to STACK + from FREG to STACK gen - COMMENT("stack FD") + COMMENT("stack FPR") stfdu %1, {GPRINDIRECT, SP, 0-8} - from FS to STACK + from FSREG to STACK gen - COMMENT("stack FS") + COMMENT("stack FSREG") stfsu %1, {GPRINDIRECT, SP, 0-4} - - from TOKEN to STACK - gen - invalid. - - - + + + COERCIONS from REG @@ -793,11 +753,11 @@ COERCIONS COMMENT("coerce REG->REG") move %1, %a yields %a - - from CONST + + from CONST_ALL uses REG gen - COMMENT("coerce CONST->REG") + COMMENT("coerce CONST_ALL->REG") move %1, %a yields %a @@ -805,7 +765,7 @@ COERCIONS uses REG gen COMMENT("coerce LABEL->REG") - move %1, {GPRE, %a} + move %1, %a yields %a from STACK @@ -815,7 +775,16 @@ COERCIONS lwz %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 4} yields %a - + + from STACK + uses REG_PAIR + gen + COMMENT("coerce STACK->REG_PAIR") + lwz %a.1, {GPRINDIRECT, SP, 0} + lwz %a.2, {GPRINDIRECT, SP, 4} + addi SP, SP, {CONST, 8} + yields %a + from SEX_B uses REG gen @@ -833,82 +802,108 @@ COERCIONS from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL uses REG gen - move %1, {GPRE, %a} + move %1, %a yields %a - from FS - uses FREG + from FSREG + uses FSREG gen - fmr {FS, %a}, %1 - yields {FS, %a} + fmr %a, %1 + yields %a - from FD + from FREG uses FREG gen - fmr {FD, %a}, %1 - yields {FD, %a} + fmr %a, %1 + yields %a from STACK uses FREG gen - COMMENT("coerce STACK->FD") - lfd {FD, %a}, {GPRINDIRECT, SP, 0} + COMMENT("coerce STACK->FREG") + lfd %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 8} - yields {FD, %a} + yields %a from STACK - uses FREG + uses FSREG gen - COMMENT("coerce STACK->FS") - lfs {FS, %a}, {GPRINDIRECT, SP, 0} + COMMENT("coerce STACK->FSREG") + lfs %a, {GPRINDIRECT, SP, 0} addi SP, SP, {CONST, 4} - yields {FS, %a} - - from IND_ALL_W + yields %a + + from IND_ALL_BHW uses REG gen move %1, %a yields %a - + from IND_ALL_W - uses FREG + uses FSREG gen - move %1, {FS, %a} - yields {FS, %a} - + move %1, %a + yields %a + + /* + * from IND_RC_D to REG_PAIR is not possible, because + * %1.off+4 might overflow a signed 16-bit integer in + * move {IND_RC_W, %1.val, %1.off+4}, %a.2 + */ + from IND_ALL_D uses FREG gen - move %1, {FD, %a} - yields {FD, %a} - - + move %1, %a + yields %a + PATTERNS /* Intrinsics */ - pat loc /* Load constant */ - yields {CONST, $1} + pat loc $1==(0-0x8000) /* Load constant */ + yields {CONST_N8000, $1} + pat loc $1>=(0-0x7FFF) && $1<=(0-1) + yields {CONST_N7FFF_N0001, $1} + pat loc $1>=0 && $1<=0x7FFF + yields {CONST_0000_7FFF, $1} + pat loc $1==0x8000 + yields {CONST_8000, $1} + pat loc $1>=0x8001 && $1<=0xFFFF + yields {CONST_8001_FFFF, $1} + pat loc lo($1)==0 + yields {CONST_HZ, $1} + pat loc + yields {CONST_HL, $1} pat dup $1==INT32 /* Duplicate word on top of stack */ - with GPR + with REG + yields %1 %1 + with FSREG yields %1 %1 pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with GPR GPR + with REG REG yields %2 %1 %2 %1 + with FREG + yields %1 %1 pat exg $1==INT32 /* Exchange top two words on stack */ - with GPR GPR + with REG REG yields %1 %2 pat stl lol $1==$2 /* Store then load local */ leaving dup 4 stl $1 - + + pat sdl ldl $1==$2 /* Store then load double local */ + leaving + dup 8 + sdl $1 + pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ leaving dup INT32 @@ -970,9 +965,13 @@ PATTERNS /* Local variables */ - pat lal /* Load address of local */ + pat lal smalls($1) /* Load address of local */ yields {SUM_RC, FP, $1} + pat lal /* Load address of local */ + uses REG={SUM_RIS, FP, his($1)} + yields {SUM_RC, %a, los($1)} + pat lol inreg($1)>0 /* Load from local */ yields {LOCAL, $1} @@ -987,7 +986,7 @@ PATTERNS loi INT32*2 pat stl inreg($1)>0 /* Store to local */ - with CONST + LABEL + GPR + OP_ALL_W + with CONST_ALL + LABEL + GPR + OP_ALL_W kills regvar($1), LOCAL %off==$1 gen move %1, {GPRE, regvar($1)} @@ -1017,12 +1016,7 @@ PATTERNS leaving lol $1 sti INT32 - - pat stl lol $1==$2 /* Save then load (generated by C compiler) */ - leaving - dup 4 - stl $1 - + pat zrl /* Zero local */ leaving loc 0 @@ -1121,55 +1115,29 @@ PATTERNS pat loi $1==INT8 /* Load byte indirect */ with GPR - uses REG - gen - lbz %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_B, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lbzx %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_B, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_B, %1.reg, %1.off}, %a - yields %a - - pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ + yields {IND_RC_B, %1.reg, %1.off} + + pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 + /* Load half-word indirect and sign extend */ with GPR - uses REG - gen - lha %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_H_S, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lhax %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_H_S, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_H_S, %1.reg, %1.off}, %a - yields %a - + yields {IND_RC_H_S, %1.reg, %1.off} + pat loi $1==INT16 /* Load half-word indirect */ with GPR - uses REG - gen - lhz %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_H, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lhzx %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_H, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_H, %1.reg, %1.off}, %a - yields %a - + yields {IND_RC_H, %1.reg, %1.off} + pat loi $1==INT32 /* Load word indirect */ with GPR yields {IND_RC_W, %1, 0} @@ -1177,8 +1145,6 @@ PATTERNS yields {IND_RC_W, %1.reg, %1.off} with SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_W, %1.adr} pat loi $1==INT64 /* Load double-word indirect */ with GPR @@ -1187,8 +1153,6 @@ PATTERNS yields {IND_RC_D, %1.reg, %1.off} with SUM_RR yields {IND_RR_D, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_D, %1.adr} pat loi /* Load arbitrary size */ leaving @@ -1200,90 +1164,125 @@ PATTERNS kills ALL gen bl {LABEL, ".los"} - + pat sti $1==INT8 /* Store byte indirect */ with GPR GPR + kills MEMORY gen stb %2, {GPRINDIRECT, %1, 0} with SUM_RR GPR + kills MEMORY gen stbx %2, %1.reg1, %1.reg2 with SUM_RC GPR + kills MEMORY gen move %2, {IND_RC_B, %1.reg, %1.off} with GPR SEX_B + kills MEMORY gen stb %2.reg, {GPRINDIRECT, %1, 0} with SUM_RR SEX_B + kills MEMORY gen stbx %2.reg, %1.reg1, %1.reg2 with SUM_RC SEX_B + kills MEMORY gen move %2.reg, {IND_RC_B, %1.reg, %1.off} pat sti $1==INT16 /* Store half-word indirect */ with GPR GPR + kills MEMORY gen sth %2, {GPRINDIRECT, %1, 0} with SUM_RR GPR + kills MEMORY gen sthx %2, %1.reg1, %1.reg2 with SUM_RC GPR + kills MEMORY gen move %2, {IND_RC_H, %1.reg, %1.off} with GPR SEX_H + kills MEMORY gen sth %2.reg, {GPRINDIRECT, %1, 0} with SUM_RR SEX_H + kills MEMORY gen sthx %2.reg, %1.reg1, %1.reg2 with SUM_RC SEX_H + kills MEMORY gen move %2.reg, {IND_RC_H, %1.reg, %1.off} pat sti $1==INT32 /* Store word indirect */ - with GPR GPR+FS + with GPR GPR+FSREG + kills MEMORY gen move %2, {IND_RC_W, %1, 0} - with SUM_RR GPR+FS + with SUM_RR GPR+FSREG + kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} - with SUM_RC GPR+FS + with SUM_RC GPR+FSREG + kills MEMORY gen move %2, {IND_RC_W, %1.reg, %1.off} - with LABEL GPR+FS - gen - move %2, {IND_LABEL_W, %1.adr} pat sti $1==INT64 /* Store double-word indirect */ - with GPR FD + with REG FREG + kills MEMORY gen move %2, {IND_RC_D, %1, 0} - with SUM_RR FD + with SUM_RR FREG + kills MEMORY gen move %2, {IND_RR_D, %1.reg1, %1.reg2} - with SUM_RC FD + with SUM_RC FREG + kills MEMORY gen move %2, {IND_RC_D, %1.reg, %1.off} - with GPR GPR GPR + /* + * This pattern would be too slow: + * with REG REG REG + * ncg can't handle that many registers, and would + * take about 2 seconds on each sti 8. So we use + * REG_PAIR as a speed hack for sti 8. + */ + with REG REG_PAIR + kills MEMORY gen - stw %2, {GPRINDIRECT, %1, 0} - stw %3, {GPRINDIRECT, %1, 4} - with SUM_RC GPR GPR + move %2.1, {IND_RC_W, %1, 0} + move %2.2, {IND_RC_W, %1, 4} + /* + * Next 2 patterns exist because there is no coercion + * from IND_ALL_D to REG_PAIR. + */ + with REG IND_RC_D + kills MEMORY + uses REG={SUM_RC, %2.reg, %2.off}, REG_PAIR gen - move %2, {IND_RC_W, %1.reg, %1.off} - move %3, {IND_RC_W, %1.reg, %1.off+4} - with LABEL FD + move {IND_RC_W, %a, 0}, %b.1 + move {IND_RC_W, %a, 4}, %b.2 + move %b.1, {IND_RC_W, %1, 0} + move %b.2, {IND_RC_W, %1, 4} + with REG IND_RR_D + kills MEMORY + uses REG={SUM_RR, %2.reg1, %2.reg2}, REG_PAIR gen - move %2, {IND_LABEL_D, %1.adr} - + move {IND_RC_W, %a, 0}, %b.1 + move {IND_RC_W, %a, 4}, %b.2 + move %b.1, {IND_RC_W, %1, 0} + move %b.2, {IND_RC_W, %1, 4} pat sti /* Store arbitrary size */ leaving loc $1 sts INT32 - - pat sts /* Load arbitrary size */ + + pat sts /* Store arbitrary size */ with GPR3 GPR4 STACK kills ALL gen @@ -1347,28 +1346,42 @@ PATTERNS pat adi $1==4 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} - with CONST REG + with CONST2 REG yields {SUM_RC, %2, %1.val} - with REG CONST + with REG CONST2 yields {SUM_RC, %1, %2.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off+%1.val} - with CONST LABEL + with CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} + yields %a + with REG CONST_HZ + uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} + yields %a + with CONST_ALL-CONST2-CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} + yields {SUM_RC, %a, los(%1.val)} + with REG CONST_ALL-CONST2-CONST_HZ + uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} + yields {SUM_RC, %a, los(%2.val)} + with CONST_ALL LABEL yields {LABEL, %2.adr+%1.val} - + pat sbi $1==4 /* Subtract word (second - top) */ with REG REG uses reusing %2, REG gen subf %a, %1, %2 yields %a - with CONST REG + with CONST2_WHEN_NEG REG yields {SUM_RC, %2, 0-%1.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off-%1.val} - with CONST LABEL + with CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} + yields %a + with CONST_ALL-CONST2_WHEN_NEG-CONST_HZ REG + uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} + yields {SUM_RC, %a, los(0-%1.val)} + with CONST_ALL LABEL yields {LABEL, %2.adr+(0-%1.val)} - + pat ngi $1==4 /* Negate word */ with REG uses reusing %1, REG @@ -1428,47 +1441,87 @@ PATTERNS yields %a with GPR GPR yields {AND_RR, %1, %2} - with GPR CONST - yields {AND_RC, %1, %2.val} - with CONST GPR - yields {AND_RC, %2, %1.val} - + with GPR UCONST2 + uses reusing %1, REG + gen + andiX %a, %1, {CONST, %2.val} + yields %a + with UCONST2 GPR + uses reusing %2, REG + gen + andiX %a, %2, {CONST, %1.val} + yields %a + with GPR CONST_HZ + uses reusing %1, REG + gen + andisX %a, %1, {CONST, hi(%2.val)} + yields %a + with CONST_HZ GPR + uses reusing %2, REG + gen + andisX %a, %2, {CONST, hi(%1.val)} + yields %a + pat and !defined($1) /* AND set */ with STACK gen bl {LABEL, ".and"} - + pat ior $1==4 /* OR word */ - with GPR NOT_R + with REG NOT_R uses reusing %1, REG gen orc %a, %1, %2.reg yields %a - with NOT_R GPR + with NOT_R REG uses reusing %2, REG gen orc %a, %2, %1.reg yields %a - with GPR GPR + with REG REG yields {OR_RR, %1, %2} - with GPR CONST + with REG UCONST2 yields {OR_RC, %1, %2.val} - with CONST GPR + with UCONST2 REG yields {OR_RC, %2, %1.val} - + with REG CONST_HZ + uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} + yields %a + with CONST_HZ REG + uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} + yields %a + with REG CONST_ALL-UCONST2-CONST_HZ + uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} + yields {OR_RC, %1, lo(%2.val)} + with CONST_ALL-UCONST2-CONST_HZ REG + uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} + yields {OR_RC, %2, lo(%1.val)} + pat ior !defined($1) /* OR set */ with STACK gen bl {LABEL, ".ior"} - + pat xor $1==4 /* XOR word */ - with GPR GPR + with REG REG yields {XOR_RR, %1, %2} - with GPR CONST + with REG UCONST2 yields {XOR_RC, %1, %2.val} - with CONST GPR + with UCONST2 REG yields {XOR_RC, %2, %1.val} - + with REG CONST_HZ + uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} + yields %a + with CONST_HZ REG + uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} + yields %a + with REG CONST_ALL-UCONST2-CONST_HZ + uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} + yields {XOR_RC, %1, lo(%2.val)} + with CONST_ALL-UCONST2-CONST_HZ REG + uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} + yields {XOR_RC, %2, lo(%1.val)} + pat xor !defined($1) /* XOR set */ with STACK gen @@ -1499,7 +1552,7 @@ PATTERNS bl {LABEL, ".com"} pat sli $1==4 /* Shift left (second << top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} @@ -1511,7 +1564,7 @@ PATTERNS yields %a pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen srawi %a, %2, {CONST, %1.val & 0x1F} @@ -1523,7 +1576,7 @@ PATTERNS yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST GPR + with CONST_ALL GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} @@ -1603,9 +1656,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".teq_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tne /* top = (top != 0) */ @@ -1613,9 +1666,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tne_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tlt /* top = (top < 0) */ @@ -1623,9 +1676,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tlt_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tle /* top = (top <= 0) */ @@ -1633,9 +1686,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tle_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tgt /* top = (top > 0) */ @@ -1643,9 +1696,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tgt_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a pat tge /* top = (top >= 0) */ @@ -1653,9 +1706,9 @@ PATTERNS uses reusing %1, REG gen move %1, C0 - move C0, SCRATCH + move C0, RSCRATCH move {LABEL, ".tge_table"}, %a - lwzx %a, %a, SCRATCH + lwzx %a, %a, RSCRATCH yields %a @@ -1733,13 +1786,13 @@ PATTERNS /* Compare and jump */ pat cmi /* Signed tristate compare */ - with CONST GPR + with CONST_ALL GPR yields {TRISTATE_RC_S, %2, %1.val} with GPR GPR yields {TRISTATE_RR_S, %2, %1} pat cmu /* Unsigned tristate compare */ - with CONST GPR + with CONST_ALL GPR yields {TRISTATE_RC_U, %2, %1.val} with GPR GPR yields {TRISTATE_RR_U, %2, %1} @@ -1861,16 +1914,16 @@ PATTERNS pat fil /* Set current filename */ leaving lae $1 - ste ".filename" + ste "hol0+4" pat lin /* Set current line number */ leaving loc $1 - ste ".linenumber" + ste "hol0" pat lni /* Increment line number */ leaving - ine ".linenumber" + ine "hol0" pat lim /* Load EM trap ignore mask */ leaving @@ -1927,25 +1980,6 @@ PATTERNS move {IND_RC_W, %a, 0}, %a mtspr CTR, %a bcctr ALWAYS, {CONST, 0}, {CONST, 0} - -#if 0 - - pat gto /* longjmp */ - with STACK - gen - ld {LABEL, $1+2} - wspec {CONST, 1} - ld {LABEL, $1+4} - wspec {CONST, 0} - ld {LABEL, $1+0} - wspec {CONST, 2} - - pat str $1==1 /* Store special GPRister */ - with GPR0 - gen - wspec {CONST, $1} - -#endif pat lor $1==0 /* Load FP */ uses REG @@ -1976,14 +2010,28 @@ PATTERNS pat str $1==2 /* Store HP */ leaving ste ".reghp" - + + pat loc ass $1==4 /* Drop 4 bytes from stack */ + with exact GPR + /* nop */ + with STACK + gen + addi SP, SP, {CONST, 4} + pat ass /* Adjust stack by variable amount */ - with CONST + with CONST2 STACK gen - move {SUM_RC, SP, %1.val}, {GPRE, SP} - with GPR + move {SUM_RC, SP, %1.val}, SP + with CONST_HZ STACK gen - move {SUM_RR, SP, %1}, {GPRE, SP} + move {SUM_RC, SP, his(%1.val)}, SP + with CONST_ALL-CONST2-CONST_HZ STACK + gen + move {SUM_RC, SP, his(%1.val)}, SP + move {SUM_RC, SP, los(%1.val)}, SP + with GPR STACK + gen + move {SUM_RR, SP, %1}, SP pat asp /* Adjust stack by constant amount */ leaving @@ -2004,47 +2052,47 @@ PATTERNS loe ".fs_00000000" pat adf $1==INT32 /* Add single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fadds {FS, %a}, %2, %1 - yields {FS, %a} + fadds %a, %2, %1 + yields %a pat sbf $1==INT32 /* Subtract single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fsubs {FS, %a}, %2, %1 - yields {FS, %a} + fsubs %a, %2, %1 + yields %a pat mlf $1==INT32 /* Multiply single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fmuls {FS, %a}, %2, %1 - yields {FS, %a} + fmuls %a, %2, %1 + yields %a pat dvf $1==INT32 /* Divide single */ - with FS FS - uses reusing %1, FREG + with FSREG FSREG + uses reusing %1, FSREG gen - fdivs {FS, %a}, %2, %1 - yields {FS, %a} + fdivs %a, %2, %1 + yields %a pat ngf $1==INT32 /* Negate single */ - with FS - uses reusing %1, FREG + with FSREG + uses reusing %1, FSREG gen - fneg {FS, %a}, %1 - yields {FS, %a} + fneg %a, %1 + yields %a pat cmf $1==INT32 /* Compare single */ - with FS FS - yields {TRISTATE_FF, %2.reg, %1.reg} + with FSREG FSREG + yields {TRISTATE_FF, %2.1, %1.1} pat loc loc cff $1==INT32 && $2==INT64 /* Convert single to double */ - with FS - yields {FD, %1.reg} + with FSREG + yields %1.1 pat loc loc cfu $1==INT32 && $2==INT32 /* Convert single to unsigned int */ with STACK @@ -2078,50 +2126,50 @@ PATTERNS lde ".fd_00000000" pat adf $1==INT64 /* Add double */ - with FD FD + with FREG FREG uses FREG gen - fadd {FD, %a}, %2, %1 - yields {FD, %a} + fadd %a, %2, %1 + yields %a pat sbf $1==INT64 /* Subtract double */ - with FD FD + with FREG FREG uses FREG gen - fsub {FD, %a}, %2, %1 - yields {FD, %a} + fsub %a, %2, %1 + yields %a pat mlf $1==INT64 /* Multiply double */ - with FD FD + with FREG FREG uses reusing %1, FREG gen - fmul {FD, %a}, %2, %1 - yields {FD, %a} + fmul %a, %2, %1 + yields %a pat dvf $1==INT64 /* Divide double */ - with FD FD + with FREG FREG uses reusing %1, FREG gen - fdiv {FD, %a}, %2, %1 - yields {FD, %a} + fdiv %a, %2, %1 + yields %a pat ngf $1==INT64 /* Negate double */ - with FD + with FREG uses reusing %1, FREG gen - fneg {FD, %a}, %1 - yields {FD, %a} + fneg %a, %1 + yields %a pat cmf $1==INT64 /* Compare double */ - with FD FD - yields {TRISTATE_FF, %2.reg, %1.reg} + with FREG FREG + yields {TRISTATE_FF, %2, %1} pat loc loc cff $1==INT64 && $2==INT32 /* Convert double to single */ - with FD - uses reusing %1, FREG + with FREG + uses reusing %1, FSREG gen - frsp {FS, %a}, %1 - yields {FS, %a} + frsp %a, %1 + yields %a pat loc loc cfu $1==INT64 && $2==INT32 /* Convert double to unsigned int */ with STACK @@ -2143,18 +2191,17 @@ PATTERNS with STACK gen bl {LABEL, ".cuf8"} - - pat fef $1==INT64 /* Split double */ - with FD + + pat fef $1==INT64 /* Split exponent, fraction */ + with GPR3 GPR4 + kills FPR0, FPR1, GPR6, GPR7 gen - addi SP, SP, {CONST, 0-8} - stfd %1, {GPRINDIRECT, SP, 0} - stwu SP, {GPRINDIRECT, SP, 0-4} - bl {LABEL, "___fef8"} - stw R3, {GPRINDIRECT, SP, 0} - - pat fif $1==INT64 /* Multiply and split double (?) */ - with STACK + bl {LABEL, ".fef8"} + yields R4 R3 R5 + + pat fif $1==INT64 /* Multiply then split integer, fraction */ + with FPR1 FPR2 + kills FPR1, FPR2, GPR3, GPR4, GPR5, GPR6 gen bl {LABEL, ".fif8"} - + yields F1 F2 diff --git a/mach/proto/ncg/codegen.c b/mach/proto/ncg/codegen.c index cf7379ccf..15d99d393 100644 --- a/mach/proto/ncg/codegen.c +++ b/mach/proto/ncg/codegen.c @@ -909,23 +909,6 @@ normalfailed: if (stackpad!=tokpatlen) { break; } -#endif -#ifdef USE_NOFRAMEPOINTER - case DO_STACKADJUST: { - result_t result; - int nodeno; - - DEBUG("STACKADJUST"); - /* The offset is an expression, which we need to evaluate. */ - - getint(nodeno,codep); - compute(&enodes[nodeno], &result); - assert(result.e_typ==EV_INT); - - if (toplevel) - stackoffset += result.e_v.e_con; - break; - } #endif } } diff --git a/mach/proto/ncg/extern.h b/mach/proto/ncg/extern.h index aa5e42489..3f376d4d1 100644 --- a/mach/proto/ncg/extern.h +++ b/mach/proto/ncg/extern.h @@ -20,9 +20,6 @@ extern rl_p curreglist; /* side effect of findcoerc() */ #ifndef NDEBUG extern int Debug; /* on/off debug printout */ #endif -#ifdef USE_NOFRAMEPOINTER -extern int stackoffset; /* offset from localbase to sp */ -#endif /* * Next descriptions are external declarations for tables created diff --git a/plat/linuxppc/boot.s b/plat/linuxppc/boot.s index 66cb38306..2da5dd556 100644 --- a/plat/linuxppc/boot.s +++ b/plat/linuxppc/boot.s @@ -41,7 +41,14 @@ begtext: stwu r3, -4(sp) b __m_a_i_n - + +! Define symbols at the beginning of our various segments, so that we can find +! them. (Except .text, which has already been done.) + +.sect .data; begdata: +.sect .rom; begrom: +.sect .bss; begbss: + ! Some magic data. All EM systems need these. .define _errno @@ -50,7 +57,3 @@ begtext: .define .trppc, .ignmask .comm .trppc, 4 ! ptr to user trap handler .comm .ignmask, 4 ! user trap ignore mask - -.define .linenumber, .filename -.comm .linenumber, 4 ! current linenumber (used for debugging) -.comm .filename, 4 ! ptr to current filename (used for debugging) diff --git a/plat/linuxppc/build-tools.lua b/plat/linuxppc/build-tools.lua index 84f6e774d..0157e31a4 100644 --- a/plat/linuxppc/build-tools.lua +++ b/plat/linuxppc/build-tools.lua @@ -10,10 +10,16 @@ build_mcg { arch = "powerpc", } +build_ncg { + name = "ncg", + arch = "powerpc", +} + return installable { name = "tools", map = { ["$(PLATDEP)/linuxppc/as"] = "+as", + ["$(PLATDEP)/linuxppc/ncg"] = "+ncg", ["$(PLATDEP)/linuxppc/mcg"] = "+mcg", ["$(PLATIND)/descr/linuxppc"] = "./descr", "util/opt+pkg", diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index 770e8834f..72958b212 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -1,6 +1,4 @@ -# $Source: /cvsroot/tack/Ack/plat/linux386/descr,v $ -# $State: Exp $ -# $Revision: 1.1 $ +# plat/linuxppc/descr var w=4 var wa=4 @@ -19,8 +17,8 @@ var xa={x} var ARCH=powerpc var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} -var CPP_F=-D__unix -D__POWERPC -var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x80000054 +var CPP_F=-D__unix +var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x10000054 var C_LIB={PLATFORMDIR}/libc-ansi.a # bitfields reversed for compatibility with (g)cc. var CC_ALIGN=-Vr @@ -35,6 +33,7 @@ var C_INCLUDES=-I{PLATFORMDIR}/include -I{EM}/share/ack/include/ansi name be from .m.g to .s + # Change this back to ncg to revert to the old code generator program {EM}/lib/ack/{PLATFORM}/mcg mapflag -gdb GF=-gdb args {GF?} < @@ -65,8 +64,9 @@ name led mapflag -l* LNAME={PLATFORMDIR}/lib* mapflag -fp FLOATS={EM}/{LIB}fp args {ALIGN} {SEPID?} \ - {PLATFORMDIR}/boot.o \ - ({RTS}:.ocm.b.c={PLATFORMDIR}/c-ansi.o) \ + (.e:{HEAD}={PLATFORMDIR}/boot.o) \ + ({RTS}:.ocm.b={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.c={PLATFORMDIR}/c-ansi.o) \ ({RTS}:.mod={PLATFORMDIR}/modula2.o) \ ({RTS}:.p={PLATFORMDIR}/pascal.o) \ -o > < \ @@ -75,9 +75,10 @@ name led (.mod:{TAIL}={PLATFORMDIR}/libmodula2.a) \ (.ocm:{TAIL}={PLATFORMDIR}/liboccam.a) \ (.ocm.b.mod.c.p:{TAIL}={PLATFORMDIR}/libc.a) \ - {PLATFORMDIR}/libem.a \ - {PLATFORMDIR}/libsys.a \ - {PLATFORMDIR}/libend.a + {FLOATS?} \ + (.e:{TAIL}={PLATFORMDIR}/libem.a \ + {PLATFORMDIR}/libsys.a \ + {PLATFORMDIR}/libend.a) linker end name cv diff --git a/plat/linuxppc/libsys/_syscall.s b/plat/linuxppc/libsys/_syscall.s index ccafe2460..c7e818830 100644 --- a/plat/linuxppc/libsys/_syscall.s +++ b/plat/linuxppc/libsys/_syscall.s @@ -42,7 +42,7 @@ __syscall: bc IFTRUE, GT, 2f 3: - la r4, _errno + li32 r4, _errno stw r3, 0(r4) addi r3, r0, -1 bclr ALWAYS, 0, 0 diff --git a/plat/linuxppc/libsys/trap.s b/plat/linuxppc/libsys/trap.s index af36acf2d..09d3b0b21 100644 --- a/plat/linuxppc/libsys/trap.s +++ b/plat/linuxppc/libsys/trap.s @@ -65,13 +65,13 @@ EUNIMPL = 63 ! unimplemented em-instruction called addi r4, r0, 1 rlwnm r4, r4, r3, 0, 31 ! calculate trap bit - la r5, .ignmask + li32 r5, .ignmask lwz r5, 0(r5) ! load ignore mask and. r4, r4, r5 ! compare bclr IFFALSE, EQ, 0 ! return if non-zero 1: - la r4, .trppc + li32 r4, .trppc lwz r5, 0(r4) ! load user trap routine or. r5, r5, r5 ! test bc IFTRUE, EQ, fatal ! if no user trap routine, bail out @@ -92,7 +92,7 @@ EUNIMPL = 63 ! unimplemented em-instruction called fatal: addi r3, r0, 1 - la r4, message + li32 r4, message addi r5, r0, 6 addi r0, r0, 4 ! write() sc 0 diff --git a/util/ncgg/cgg.y b/util/ncgg/cgg.y index 24948abad..4f9cbb00c 100644 --- a/util/ncgg/cgg.y +++ b/util/ncgg/cgg.y @@ -38,7 +38,7 @@ int Xstackflag=0; /* set in coercions, moves, and tests. %1 means something */ struct varinfo *gen_inst(),*gen_move(),*gen_test(),*gen_preturn(),*gen_tlab(); -struct varinfo *gen_label(), *gen_stackadjust(), *make_erase(); +struct varinfo *gen_label(), *make_erase(); expr_t make_expr(),ident_expr(),subreg_expr(),tokm_expr(),all_expr(); expr_t perc_ident_expr(),sum_expr(),regvar_expr(); @@ -74,9 +74,9 @@ iocc_t iops[20]; %token TOPELTSIZE FALLTHROUGH LABELDEF %token PROC CALL EXAMPLE %token FROM TO -%token TEST MOVE STACK RETURN STACKADJUST +%token TEST MOVE STACK RETURN %token PATTERNS PAT WITH EXACT KILLS USES REUSING GEN YIELDS LEAVING -%token DEFINED SAMESIGN SFIT UFIT ROM LOWW HIGHW ISROM STACKOFFSET +%token DEFINED SAMESIGN SFIT UFIT ROM LOWW HIGHW ISROM %token CMPEQ CMPNE CMPLT CMPGT CMPLE CMPGE OR2 AND2 LSHIFT RSHIFT NOT COMP %token INREG REGVAR REG_ANY REG_FLOAT REG_LOOP REG_POINTER %token ADORNACCESS @@ -635,8 +635,8 @@ coderule maxempatlen=empatlen; } patterns - { /* if (!saferulefound) - error("Previous rule impossible on empty stack"); */ + { if (!saferulefound) + error("Previous rule impossible on empty stack"); outpatterns(); } | PROC IDENT example @@ -849,8 +849,6 @@ gen_instruction { $$ = gen_label($2-1); use_tes++; } | RETURN { $$ = gen_preturn(); } - | STACKADJUST expr - { $$ = gen_stackadjust($2.ex_index); use_noframepointer++; } ; optstar : /* empty */ @@ -1030,8 +1028,6 @@ expr { $$ = make_expr(TYPINT,EX_LOWW,$3-1,0); } | HIGHW '(' emarg ')' { $$ = make_expr(TYPINT,EX_HIGHW,$3-1,0); } - | STACKOFFSET '(' ')' - { $$ = make_expr(TYPINT,EX_STACKOFFSET, 0, 0); } /* Excluded, because it causes a shift-reduce conflict (problems with a tokenset_no followed by an optexpr) | '-' expr %prec UMINUS diff --git a/util/ncgg/coerc.c b/util/ncgg/coerc.c index eb5f6ee7a..893f81be3 100644 --- a/util/ncgg/coerc.c +++ b/util/ncgg/coerc.c @@ -127,15 +127,6 @@ struct varinfo *gen_preturn() { return(vp); } -struct varinfo *gen_stackadjust(int expr) { - register struct varinfo *vp; - - NEW(vp,struct varinfo); - vp->vi_int[0] = INSSTACKADJUST; - vp->vi_int[1] = expr; - return(vp); -} - struct varinfo *gen_tlab(n) { register struct varinfo *vp; diff --git a/util/ncgg/extern.h b/util/ncgg/extern.h index 909e04774..561591627 100644 --- a/util/ncgg/extern.h +++ b/util/ncgg/extern.h @@ -37,7 +37,6 @@ extern int regclass; extern int maxtokensize; extern int nprocargs, maxprocargs; extern int use_tes; -extern int use_noframepointer; extern char *mystrcpy(); extern char *myalloc(); diff --git a/util/ncgg/keywords b/util/ncgg/keywords index 641fd45b9..606f7c839 100644 --- a/util/ncgg/keywords +++ b/util/ncgg/keywords @@ -43,8 +43,6 @@ reusing REUSING rom ROM samesign SAMESIGN sfit SFIT -stackadjust STACKADJUST -stackoffset STACKOFFSET topeltsize TOPELTSIZE test TEST to TO diff --git a/util/ncgg/output.c b/util/ncgg/output.c index 2a905c339..50458e369 100644 --- a/util/ncgg/output.c +++ b/util/ncgg/output.c @@ -12,8 +12,6 @@ int tabledebug=0; /* do not generate code for table debugging */ #endif int verbose=0; /* print all statistics */ int use_tes; /* use top element size information */ -int use_noframepointer; /* use stackadjust mechanism to remove requirement - for frame pointer */ char *c_file= "tables.c"; char *h_file= "tables.H"; char *cd_file= "code"; @@ -614,8 +612,6 @@ outdefs() { cdef("TABLEDEBUG",1); if (use_tes) cdef("USE_TES",1); - if (use_noframepointer) - cdef("USE_NOFRAMEPOINTER",1); } outars() { @@ -856,11 +852,6 @@ varinfo *kills,*allocates,*generates,*yields,*leaving; codeint(vp->vi_int[1]); codenl(); break; - case INSSTACKADJUST: - code8(DO_STACKADJUST); - codeint(vp->vi_int[1]); - codenl(); - break; } } codecoco(cocono); diff --git a/util/ncgg/param.h b/util/ncgg/param.h index d39dd6b38..369e51d25 100644 --- a/util/ncgg/param.h +++ b/util/ncgg/param.h @@ -15,7 +15,7 @@ #define BORS(x,y) y #endif -#define MAXREGS BORS(80,30) +#define MAXREGS BORS(200,30) #define MAXPROPS BORS(120,20) #define MAXTOKENS BORS(100,60) #define MAXATT 6 diff --git a/util/ncgg/pseudo.h b/util/ncgg/pseudo.h index 91013354a..24b335c50 100644 --- a/util/ncgg/pseudo.h +++ b/util/ncgg/pseudo.h @@ -12,4 +12,3 @@ #define INSERASE (-6) #define INSREMOVE (-7) #define INSLABDEF (-8) -#define INSSTACKADJUST (-9)