From 32ebc502c86047797d72122a98e329f2ca03a05e Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 17 May 2013 00:03:38 +0100 Subject: [PATCH 01/76] Skeleton of VideoCore IV support for the Raspberry Pi. --HG-- branch : dtrg-videocore rename : mach/powerpc/as/.distr => mach/vc4/as/.distr rename : mach/powerpc/as/mach0.c => mach/vc4/as/mach0.c rename : mach/powerpc/as/mach1.c => mach/vc4/as/mach1.c rename : mach/powerpc/as/mach2.c => mach/vc4/as/mach2.c rename : mach/powerpc/as/mach3.c => mach/vc4/as/mach3.c rename : mach/powerpc/as/mach4.c => mach/vc4/as/mach4.c rename : mach/powerpc/as/mach5.c => mach/vc4/as/mach5.c rename : mach/i86/build.mk => mach/vc4/build.mk rename : mach/powerpc/libem/powerpc.h => mach/vc4/libem/videocore.h rename : mach/i86/libend/.distr => mach/vc4/libend/.distr rename : mach/i86/libend/edata.s => mach/vc4/libend/edata.s rename : mach/i86/libend/em_end.s => mach/vc4/libend/em_end.s rename : mach/i86/libend/end.s => mach/vc4/libend/end.s rename : mach/i86/libend/etext.s => mach/vc4/libend/etext.s rename : mach/powerpc/ncg/.distr => mach/vc4/ncg/.distr rename : mach/powerpc/ncg/mach.c => mach/vc4/ncg/mach.c rename : mach/powerpc/ncg/mach.h => mach/vc4/ncg/mach.h rename : mach/powerpc/ncg/table => mach/vc4/ncg/table rename : plat/pc86/descr => plat/rpi/descr --- mach/vc4/as/.distr | 6 + mach/vc4/as/binary.h | 34 + mach/vc4/as/mach0.c | 31 + mach/vc4/as/mach1.c | 6 + mach/vc4/as/mach2.c | 22 + mach/vc4/as/mach3.c | 95 ++ mach/vc4/as/mach4.c | 110 ++ mach/vc4/as/mach5.c | 7 + mach/vc4/build.mk | 10 + mach/vc4/libem/dummy.s | 9 + mach/vc4/libem/videocore.h | 15 + mach/vc4/libend/.distr | 4 + mach/vc4/libend/edata.s | 15 + mach/vc4/libend/em_end.s | 24 + mach/vc4/libend/end.s | 15 + mach/vc4/libend/etext.s | 15 + mach/vc4/ncg/.distr | 3 + mach/vc4/ncg/mach.c | 210 ++++ mach/vc4/ncg/mach.h | 32 + mach/vc4/ncg/table | 1999 +++++++++++++++++++++++++++++++ mach/vc4/test/opcodes.s | 77 ++ plat/rpi/build.mk | 47 + plat/rpi/descr | 69 ++ plat/rpi/include/ack/config.h | 11 + plat/rpi/include/unistd.h | 73 ++ plat/rpi/libsys/_hol0.s | 22 + plat/rpi/libsys/_sys_rawread.s | 26 + plat/rpi/libsys/_sys_rawwrite.s | 32 + plat/rpi/libsys/brk.c | 45 + plat/rpi/libsys/close.c | 16 + plat/rpi/libsys/creat.c | 17 + plat/rpi/libsys/errno.s | 31 + plat/rpi/libsys/getpid.c | 15 + plat/rpi/libsys/isatty.c | 15 + plat/rpi/libsys/kill.c | 16 + plat/rpi/libsys/libsys.h | 18 + plat/rpi/libsys/lseek.c | 16 + plat/rpi/libsys/open.c | 16 + plat/rpi/libsys/read.c | 45 + plat/rpi/libsys/signal.c | 17 + plat/rpi/libsys/time.c | 19 + plat/rpi/libsys/write.c | 50 + 42 files changed, 3355 insertions(+) create mode 100644 mach/vc4/as/.distr create mode 100644 mach/vc4/as/binary.h create mode 100644 mach/vc4/as/mach0.c create mode 100644 mach/vc4/as/mach1.c create mode 100644 mach/vc4/as/mach2.c create mode 100644 mach/vc4/as/mach3.c create mode 100644 mach/vc4/as/mach4.c create mode 100644 mach/vc4/as/mach5.c create mode 100644 mach/vc4/build.mk create mode 100644 mach/vc4/libem/dummy.s create mode 100644 mach/vc4/libem/videocore.h create mode 100644 mach/vc4/libend/.distr create mode 100644 mach/vc4/libend/edata.s create mode 100644 mach/vc4/libend/em_end.s create mode 100644 mach/vc4/libend/end.s create mode 100644 mach/vc4/libend/etext.s create mode 100644 mach/vc4/ncg/.distr create mode 100644 mach/vc4/ncg/mach.c create mode 100644 mach/vc4/ncg/mach.h create mode 100644 mach/vc4/ncg/table create mode 100644 mach/vc4/test/opcodes.s create mode 100644 plat/rpi/build.mk create mode 100644 plat/rpi/descr create mode 100644 plat/rpi/include/ack/config.h create mode 100644 plat/rpi/include/unistd.h create mode 100644 plat/rpi/libsys/_hol0.s create mode 100644 plat/rpi/libsys/_sys_rawread.s create mode 100644 plat/rpi/libsys/_sys_rawwrite.s create mode 100644 plat/rpi/libsys/brk.c create mode 100644 plat/rpi/libsys/close.c create mode 100644 plat/rpi/libsys/creat.c create mode 100644 plat/rpi/libsys/errno.s create mode 100644 plat/rpi/libsys/getpid.c create mode 100644 plat/rpi/libsys/isatty.c create mode 100644 plat/rpi/libsys/kill.c create mode 100644 plat/rpi/libsys/libsys.h create mode 100644 plat/rpi/libsys/lseek.c create mode 100644 plat/rpi/libsys/open.c create mode 100644 plat/rpi/libsys/read.c create mode 100644 plat/rpi/libsys/signal.c create mode 100644 plat/rpi/libsys/time.c create mode 100644 plat/rpi/libsys/write.c diff --git a/mach/vc4/as/.distr b/mach/vc4/as/.distr new file mode 100644 index 000000000..8ebe3797c --- /dev/null +++ b/mach/vc4/as/.distr @@ -0,0 +1,6 @@ +mach0.c +mach1.c +mach2.c +mach3.c +mach4.c +mach5.c diff --git a/mach/vc4/as/binary.h b/mach/vc4/as/binary.h new file mode 100644 index 000000000..c00e73550 --- /dev/null +++ b/mach/vc4/as/binary.h @@ -0,0 +1,34 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef BINARY_H +#define BINARY_H + +/* This grotesque nonsense allows us to use binary constants from C. */ + +#define HEX__(n) 0x##n##LU +#define B8__(x) \ + ((x&0x0000000FLU)?1:0) \ + +((x&0x000000F0LU)?2:0) \ + +((x&0x00000F00LU)?4:0) \ + +((x&0x0000F000LU)?8:0) \ + +((x&0x000F0000LU)?16:0) \ + +((x&0x00F00000LU)?32:0) \ + +((x&0x0F000000LU)?64:0) \ + +((x&0xF0000000LU)?128:0) + +#define B8(d) \ + ((unsigned char)B8__(HEX__(d))) +#define B16(dmsb,dlsb) \ + (((unsigned short)B8(dmsb)<<8) + B8(dlsb)) +#define B32(dmsb,db2,db3,dlsb) \ + (((unsigned long)B8(dmsb)<<24) \ + + ((unsigned long)B8(db2)<<16) \ + + ((unsigned long)B8(db3)<<8) \ + + B8(dlsb)) + +#endif diff --git a/mach/vc4/as/mach0.c b/mach/vc4/as/mach0.c new file mode 100644 index 000000000..b6294ecfc --- /dev/null +++ b/mach/vc4/as/mach0.c @@ -0,0 +1,31 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#define THREE_PASS /* branch and offset optimization */ +#define LISTING /* enable listing facilities */ +#define RELOCATION /* generate relocatable code */ +#define DEBUG 0 + +#undef valu_t +#define valu_t long + +#undef ADDR_T +#define ADDR_T long + +#undef word_t +#define word_t long + +#undef ALIGNWORD +#define ALIGNWORD 4 + +#undef ALIGNSECT +#define ALIGNSECT 4 + +#undef VALWIDTH +#define VALWIDTH 8 + +#define FIXUPFLAGS (RELBR | RELWR) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c new file mode 100644 index 000000000..96a8a07fa --- /dev/null +++ b/mach/vc4/as/mach1.c @@ -0,0 +1,6 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c new file mode 100644 index 000000000..c69007de0 --- /dev/null +++ b/mach/vc4/as/mach2.c @@ -0,0 +1,22 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +%token GPR + +%token OP +%token OP_ONEREG +%token OP_ONELREG +%token OP_ALU +%token OP_MEM +%token OP_BREG +%token OP_STACK + +/* Other token types */ + +/* %type c */ +%type e16 u8 u7 u6 u5 u4 u2 u1 +/* %type nb ds bda bdl lia lil */ diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c new file mode 100644 index 000000000..b36652db7 --- /dev/null +++ b/mach/vc4/as/mach3.c @@ -0,0 +1,95 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "binary.h" + +/* Integer registers */ + +0, GPR, 0, "r0", +0, GPR, 1, "r1", +0, GPR, 2, "r2", +0, GPR, 3, "r3", +0, GPR, 4, "r4", +0, GPR, 5, "r5", + +0, GPR, 6, "r6", +0, GPR, 6, "fp", +0, GPR, 7, "r7", +0, GPR, 8, "r8", +0, GPR, 9, "r9", +0, GPR, 10, "r10", +0, GPR, 11, "r11", +0, GPR, 12, "r12", +0, GPR, 13, "r13", +0, GPR, 14, "r14", +0, GPR, 15, "r15", +0, GPR, 16, "r16", +0, GPR, 17, "r17", +0, GPR, 18, "r18", +0, GPR, 19, "r19", +0, GPR, 20, "r20", +0, GPR, 21, "r21", +0, GPR, 22, "r22", +0, GPR, 23, "r23", +0, GPR, 24, "r24", + +0, GPR, 25, "r25", +0, GPR, 25, "sp", +0, GPR, 26, "r26", +0, GPR, 26, "lr", +0, GPR, 27, "r27", +0, GPR, 28, "r28", +0, GPR, 29, "r29", +0, GPR, 30, "r30", +0, GPR, 30, "sr", +0, GPR, 31, "r31", +0, GPR, 31, "pc", + +/* Special instructions */ + +0, OP, B16(00000000,00000001), "nop", +0, OP, B16(00000000,00001010), "rti", + +0, OP_ONEREG, B16(00000000,01000000), "b", +0, OP_ONEREG, B16(00000000,01100000), "bl", +0, OP_ONELREG, B16(00000000,10000000), "tbb", +0, OP_ONELREG, B16(00000000,10100000), "tbs", + +0, OP_ALU, B8(00000000), "mov", +0, OP_ALU, B8(00000001), "cmn", +0, OP_ALU, B8(00000010), "add", +0, OP_ALU, B8(00000011), "bic", +0, OP_ALU, B8(00000100), "mul", +0, OP_ALU, B8(00000101), "eor", +0, OP_ALU, B8(00000110), "sub", +0, OP_ALU, B8(00000111), "and", +0, OP_ALU, B8(00001000), "mvn", +0, OP_ALU, B8(00001001), "ror", +0, OP_ALU, B8(00001010), "cmp", +0, OP_ALU, B8(00001011), "rsb", +0, OP_ALU, B8(00001100), "btst", +0, OP_ALU, B8(00001101), "or", +0, OP_ALU, B8(00001110), "extu", +0, OP_ALU, B8(00001111), "max", +0, OP_ALU, B8(00010000), "bset", +0, OP_ALU, B8(00010001), "min", +0, OP_ALU, B8(00010010), "bclr", +0, OP_ALU, B8(00010011), "adds2", +0, OP_ALU, B8(00010100), "bchg", +0, OP_ALU, B8(00010101), "adds4", +0, OP_ALU, B8(00010110), "adds8", +0, OP_ALU, B8(00010111), "adds16", +0, OP_ALU, B8(00011000), "exts", +0, OP_ALU, B8(00011001), "neg", +0, OP_ALU, B8(00011010), "lsr", +0, OP_ALU, B8(00011011), "clz", +0, OP_ALU, B8(00011100), "lsl", +0, OP_ALU, B8(00011101), "brev", +0, OP_ALU, B8(00011110), "asr", +0, OP_ALU, B8(00011111), "abs", + + diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c new file mode 100644 index 000000000..d1320dae1 --- /dev/null +++ b/mach/vc4/as/mach4.c @@ -0,0 +1,110 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "binary.h" + +operation + : OP { emit2($1); } + + | OP_ONEREG GPR + { + emit2($1 | ($2<<0)); + } + + | OP_ONELREG GPR + { + if ($2 >= 0x10) + serror("cannot use r16+ here"); + emit2($1 | ($2<<0)); + } + + | OP_ALU GPR ',' GPR + { + emit2(B16(01000000, 00000000) | ($1<<8) | ($2<<0) | ($4<<4)); + } + + | OP_ALU GPR ',' '#' u5 + { + if ($1 >= 0x10) + serror("cannot use this ALU operation in 2op form"); + emit2(B16(01100000, 00000000) | ($1<<9) | ($2<<0) | ($5<<4)); + } + ; + +e16 + : expr + { + DOTVAL += 2; + newrelo($1.typ, RELO2 | FIXUPFLAGS); + DOTVAL -= 2; + $$ = $1.val & 0xFFFF; + } + ; + +u8 + : absexp + { + if (($1 < 0) || ($1 > 0xFF)) + serror("8-bit unsigned value out of range"); + $$ = $1; + } + ; + +u7 + : absexp + { + if (($1 < 0) || ($1 > 0x7F)) + serror("7-bit unsigned value out of range"); + $$ = $1; + } + ; + +u6 + : absexp + { + if (($1 < 0) || ($1 > 0x3F)) + serror("6-bit unsigned value out of range"); + $$ = $1; + } + ; + +u5 + : absexp + { + if (($1 < 0) || ($1 > 0x1F)) + serror("5-bit unsigned value out of range"); + $$ = $1; + } + ; + +u4 + : absexp + { + if (($1 < 0) || ($1 > 0xF)) + serror("4-bit unsigned value out of range"); + $$ = $1; + } + ; + +u1 + : absexp + { + if (($1 < 0) || ($1 > 1)) + serror("1-bit unsigned value out of range"); + $$ = $1; + } + ; + +u2 + : absexp + { + if (($1 < 0) || ($1 > 0x3)) + serror("2-bit unsigned value out of range"); + $$ = $1; + } + ; + diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c new file mode 100644 index 000000000..668f4b748 --- /dev/null +++ b/mach/vc4/as/mach5.c @@ -0,0 +1,7 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + diff --git a/mach/vc4/build.mk b/mach/vc4/build.mk new file mode 100644 index 000000000..06ea3edd4 --- /dev/null +++ b/mach/vc4/build.mk @@ -0,0 +1,10 @@ +arch-libem-vc4 := \ + dummy.s + +arch-libend-vc4 = \ + edata.s \ + em_end.s \ + end.s \ + etext.s + + diff --git a/mach/vc4/libem/dummy.s b/mach/vc4/libem/dummy.s new file mode 100644 index 000000000..4edaa030f --- /dev/null +++ b/mach/vc4/libem/dummy.s @@ -0,0 +1,9 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "videocore.h" diff --git a/mach/vc4/libem/videocore.h b/mach/vc4/libem/videocore.h new file mode 100644 index 000000000..3e27a7e7b --- /dev/null +++ b/mach/vc4/libem/videocore.h @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + diff --git a/mach/vc4/libend/.distr b/mach/vc4/libend/.distr new file mode 100644 index 000000000..afa027b6e --- /dev/null +++ b/mach/vc4/libend/.distr @@ -0,0 +1,4 @@ +edata.s +em_end.s +end.s +etext.s diff --git a/mach/vc4/libend/edata.s b/mach/vc4/libend/edata.s new file mode 100644 index 000000000..e706877db --- /dev/null +++ b/mach/vc4/libend/edata.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _edata +.sect .data +_edata: diff --git a/mach/vc4/libend/em_end.s b/mach/vc4/libend/em_end.s new file mode 100644 index 000000000..bae5aaa0c --- /dev/null +++ b/mach/vc4/libend/em_end.s @@ -0,0 +1,24 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .end ! only for declaration of _end, __end and endbss. +.define endtext, endrom, enddata, endbss, __end + + .sect .text +endtext: + .sect .rom +endrom: + .sect .data +enddata: + .sect .end +__end: +endbss: diff --git a/mach/vc4/libend/end.s b/mach/vc4/libend/end.s new file mode 100644 index 000000000..5ce2882b6 --- /dev/null +++ b/mach/vc4/libend/end.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _end +.sect .end ! only for declaration of _end, __end and endbss. +_end: diff --git a/mach/vc4/libend/etext.s b/mach/vc4/libend/etext.s new file mode 100644 index 000000000..973ab1814 --- /dev/null +++ b/mach/vc4/libend/etext.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _etext +.sect .text +_etext: diff --git a/mach/vc4/ncg/.distr b/mach/vc4/ncg/.distr new file mode 100644 index 000000000..ccdf9bf7e --- /dev/null +++ b/mach/vc4/ncg/.distr @@ -0,0 +1,3 @@ +mach.c +mach.h +table diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c new file mode 100644 index 000000000..f57a2a08f --- /dev/null +++ b/mach/vc4/ncg/mach.c @@ -0,0 +1,210 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include + +#ifndef NORCSID +static char rcsid[]= "$Id$" ; +#endif + +int framesize; + +/* + * machine dependent back end routines for the Zilog Z80. + */ + +con_part(int sz, word w) +{ + while (part_size % sz) + part_size++; + if (part_size == TEM_WSIZE) + part_flush(); + if (sz == 1) { + w &= 0xFF; + w <<= 8*(3-part_size); + part_word |= w; + } else if (sz == 2) { + w &= 0xFFFF; + if (part_size == 0) { + /* Shift 8 for m68k2, 16 otherwise */ + w <<= 4 * TEM_WSIZE; + } + part_word |= w; + } else { + assert(sz == TEM_WSIZE); + part_word = w; + } + part_size += sz; +} + +con_mult(word sz) +{ + + if (argval != 4) + fatal("bad icon/ucon size"); + fprintf(codefile,".data4 %s\n", str); +} + +#define CODE_GENERATOR +#define IEEEFLOAT +#define FL_MSL_AT_LOW_ADDRESS 1 +#define FL_MSW_AT_LOW_ADDRESS 1 +#define FL_MSB_AT_LOW_ADDRESS 1 +#include + +prolog(full nlocals) +{ + int ss = nlocals + 8; + fprintf(codefile, "addi sp, sp, %d\n", -ss); + fprintf(codefile, "stw fp, %d(sp)\n", nlocals); + fprintf(codefile, "mfspr r0, lr\n" + "stw r0, %d(sp)\n", nlocals+4); + fprintf(codefile, "addi fp, sp, %d\n", nlocals); + + framesize = nlocals; +} + +mes(word type) +{ + int argt ; + + switch ( (int)type ) { + case ms_ext : + for (;;) { + switch ( argt=getarg( + ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { + case sp_cend : + return ; + default: + strarg(argt) ; + fprintf(codefile,".define %s\n",argstr) ; + break ; + } + } + default : + while ( getarg(any_ptyp) != sp_cend ) ; + break ; + } +} + +char *segname[] = { + ".sect .text", + ".sect .data", + ".sect .rom", + ".sect .bss" +}; + +#ifdef REGVARS + +static int savedregsi[32]; +static int numsaved; + +/* Initialise regvar system for one function. */ + +i_regsave() +{ + int i; + + fprintf(codefile, "! i_regsave()\n"); + for (i=0; i<32; i++) + savedregsi[i] = INT_MAX; + numsaved = 0; +} + +/* Mark a register as being saved. */ + +regsave(const char* regname, full offset, int size) +{ + int regnum = atoi(regname+1); + savedregsi[regnum] = offset; + numsaved++; + + fprintf(codefile, "! %d is saved in %s\n", offset, regname); +#if 0 + fprintf(codefile, "stwu %s, -4(sp)\n", regname); + if (offset >= 0) + fprintf(codefile, "lwz %s, %d(fp)\n", regname, offset); +#endif +} + +/* Finish saving ragisters. */ + +void saveloadregs(const char* ops, const char* opm) +{ + int offset = -(framesize + numsaved*4); + int reg = 32; + + /* Check for the possibility of a multiple. */ + + do + { + reg--; + } + while ((reg > 0) && (savedregsi[reg] != INT_MAX)); + if (reg < 31) + { + fprintf(codefile, "%s r%d, %d(fp)\n", opm, reg+1, offset); + offset += (31-reg)*4; + } + + /* Saved everything else singly. */ + + while (reg > 0) + { + if (savedregsi[reg] != INT_MAX) + { + fprintf(codefile, "%s r%d, %d(fp)\n", ops, reg, offset); + offset += 4; + } + reg--; + } +} + +f_regsave() +{ + int i; + fprintf(codefile, "! f_regsave()\n"); + fprintf(codefile, "addi sp, sp, %d\n", -numsaved*4); + + saveloadregs("stw", "stmw"); + + for (i=0; i<32; i++) + if ((savedregsi[i] != INT_MAX) && (savedregsi[i] > 0)) + fprintf(codefile, "lwz r%d, %d(fp)\n", i, savedregsi[i]); +} + +/* Restore all saved registers. */ + +regreturn() +{ + fprintf(codefile, "! regreturn()\n"); + saveloadregs("lwz", "lmw"); +} + +/* Calculate the score of a given register. */ + +int regscore(full offset, int size, int type, int frequency, int totype) +{ + int score; + + fprintf(codefile, "! regscore(%ld, %d, %d, %d, %d)\n", offset, size, type, frequency, totype); + + if (size != 4) + return -1; + + /* Per use: 6 bytes (on average) + * Overhead in prologue: 4 bytes, plus 4 if a parameter + * Overhead in epilogue: 0 bytes + */ + + score = frequency*6 - 4 - ((offset>=0) ? 4 : 0); + fprintf(codefile, "! local at offset %d has regvar score %d\n", offset, score); + return score; +} + +#endif diff --git a/mach/vc4/ncg/mach.h b/mach/vc4/ncg/mach.h new file mode 100644 index 000000000..89d2b8a97 --- /dev/null +++ b/mach/vc4/ncg/mach.h @@ -0,0 +1,32 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#define ex_ap(y) fprintf(codefile,".extern %s\n",y) +#define in_ap(y) /* nothing */ + +#define newilb(x) fprintf(codefile,"%s:\n",x) +#define newdlb(x) fprintf(codefile,"%s:\n",x) +#define dlbdlb(x,y) fprintf(codefile,"%s = %s\n",x,y) +#define newlbss(l,x) fprintf(codefile,".comm %s,%u\n",l,x); + +#define cst_fmt "%d" +#define off_fmt "%d" +#define ilb_fmt "I%x_%x" +#define dlb_fmt "_%d" +#define hol_fmt "hol%d" + +#define hol_off "%ld+hol%d" + +#define con_cst(x) fprintf(codefile,".data4\t%ld\n",x) +#define con_ilb(x) fprintf(codefile,".data4\t%s\n",x) +#define con_dlb(x) fprintf(codefile,".data4\t%s\n",x) + +#define fmt_id(sf, st) sprintf(st,"_%s",sf) + +#define modhead ".sect .text; .sect .rom; .sect .data; .sect .bss\n" + +#define BSS_INIT 0 diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table new file mode 100644 index 000000000..f08ec3500 --- /dev/null +++ b/mach/vc4/ncg/table @@ -0,0 +1,1999 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +EM_WSIZE = 4 +EM_PSIZE = 4 +EM_BSIZE = 8 /* two words saved in call frame */ + +INT8 = 1 /* Size of values */ +INT16 = 2 +INT32 = 4 +INT64 = 8 + +FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ +PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ + +#define COMMENT(n) /* noop */ + + +#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) + +#define smalls(n) sfit(n, 16) +#define smallu(n) ufit(n, 16) + +#define lo(n) (n & 0xFFFF) +#define hi(n) ((n>>16) & 0xFFFF) + +/* Use these for instructions that treat the low half as signed --- his() + * includes a modifier to produce the correct value when the low half gets + * sign extended. Er, do make sure you load the low half second. */ +#define los(n) (n & 0xFFFF) +#define his(n) ((hi(n) - (lo(n)>>15)) & 0xFFFF) + +#define IFFALSE {CONST, 4} +#define IFTRUE {CONST, 12} +#define ALWAYS {CONST, 20} +#define DCTRZ {CONST, 34} + +#define LT {CONST, 0} +#define GT {CONST, 1} +#define EQ {CONST, 2} + + + +PROPERTIES + + GPR /* any GPR */ + REG /* any allocatable GPR */ + FPR /* any FPR */ + FREG /* any allocatable FPR */ + SPR /* any SPR */ + CR /* any CR */ + + GPR0 GPRSP GPRFP GPR3 GPR4 GPR5 GPR6 GPR7 + GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 + GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23 + GPR24 GPR25 GPR26 GPR27 GPR28 GPR29 GPR30 GPR31 + + CR0 CR1 + + FPR0 FPR1 FPR2 FPR3 FPR4 FPR5 FPR6 FPR7 + FPR8 FPR9 FPR10 FPR11 FPR12 FPR13 FPR14 FPR15 + FPR16 FPR17 FPR18 FPR19 FPR20 FPR21 FPR22 FPR23 + FPR24 FPR25 FPR26 FPR27 FPR28 FPR29 FPR30 FPR31 + +REGISTERS + + /* Reverse order to encourage ncg to allocate them from r31 down */ + + R31("r31") : GPR, REG, GPR31 regvar. + R30("r30") : GPR, REG, GPR30 regvar. + R29("r29") : GPR, REG, GPR29 regvar. + R28("r28") : GPR, REG, GPR28 regvar. + R27("r27") : GPR, REG, GPR27 regvar. + R26("r26") : GPR, REG, GPR26 regvar. + R25("r25") : GPR, REG, GPR25 regvar. + R24("r24") : GPR, REG, GPR24 regvar. + R23("r23") : GPR, REG, GPR23 regvar. + R22("r22") : GPR, REG, GPR22 regvar. + R21("r21") : GPR, REG, GPR21 regvar. + R20("r20") : GPR, REG, GPR20 regvar. + R19("r19") : GPR, REG, GPR19 regvar. + R18("r18") : GPR, REG, GPR18 regvar. + R17("r17") : GPR, REG, GPR17 regvar. + R16("r16") : GPR, REG, GPR16 regvar. + R15("r15") : GPR, REG, GPR15 regvar. + R14("r14") : GPR, REG, GPR14 regvar. + R13("r13") : GPR, REG, GPR13 regvar. + R12("r12") : GPR, REG, GPR12. + R11("r11") : GPR, GPR11. + R10("r10") : GPR, REG, GPR10. + R9("r9") : GPR, REG, GPR9. + R8("r8") : GPR, REG, GPR8. + R7("r7") : GPR, REG, GPR7. + R6("r6") : GPR, REG, GPR6. + R5("r5") : GPR, REG, GPR5. + R4("r4") : GPR, REG, GPR4. + R3("r3") : GPR, REG, GPR3. + FP("fp") : GPR, GPRFP. + SP("sp") : GPR, GPRSP. + R0("r0") : GPR, GPR0. + + F31("f31") : FPR, FREG, FPR31. + F30("f30") : FPR, FREG, FPR30. + F29("f29") : FPR, FREG, FPR29. + F28("f28") : FPR, FREG, FPR28. + F27("f27") : FPR, FREG, FPR27. + F26("f26") : FPR, FREG, FPR26. + F25("f25") : FPR, FREG, FPR25. + F24("f24") : FPR, FREG, FPR24. + F23("f23") : FPR, FREG, FPR23. + F22("f22") : FPR, FREG, FPR22. + F21("f21") : FPR, FREG, FPR21. + F20("f20") : FPR, FREG, FPR20. + F19("f19") : FPR, FREG, FPR19. + F18("f18") : FPR, FREG, FPR18. + F17("f17") : FPR, FREG, FPR17. + F16("f16") : FPR, FREG, FPR16. + F15("f15") : FPR, FREG, FPR15. + F14("f14") : FPR, FREG, FPR14. + F13("f13") : FPR, FREG, FPR13. + F12("f12") : FPR, FREG, FPR12. + F11("f11") : FPR, FREG, FPR11. + F10("f10") : FPR, FREG, FPR10. + F9("f9") : FPR, FREG, FPR9. + F8("f8") : FPR, FREG, FPR8. + F7("f7") : FPR, FREG, FPR7. + F6("f6") : FPR, FREG, FPR6. + F5("f5") : FPR, FREG, FPR5. + F4("f4") : FPR, FREG, FPR4. + F3("f3") : FPR, FREG, FPR3. + F2("f2") : FPR, FREG, FPR2. + F1("f1") : FPR, FREG, FPR1. + F0("f0") : FPR, FREG, FPR0. + + LR("lr") : SPR. + CTR("ctr") : SPR. + C0("cr0") : CR, CR0. + +#define SCRATCH R11 +#define FSCRATCH F0 + + +TOKENS + +/* Used only in instruction descriptions (to generate the correct syntax). */ + + GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". + GPRINDIRECTLO = { GPR reg; ADDR adr; } 4 ">" adr "(" reg ")". /* Warning! Do not use on labels. */ + HILABEL = { ADDR adr; } 4 "<" adr. + LOLABEL = { ADDR adr; } 4 ">" adr. + +/* Primitives */ + + LABEL = { ADDR adr; } 4 adr. + CONST = { INT val; } 4 val. + LOCAL = { INT off; } 4. + +/* Allows us to use regvar() to refer to registers */ + + GPRE = { GPR reg; } 4 reg. + +/* Expression partial results */ + + SUM_RC = { GPR reg; INT off; } 4. + SUM_RR = { GPR reg1; GPR reg2; } 4. + + TRISTATE_RC_S = { GPR reg; INT val; } 4. + TRISTATE_RC_U = { GPR reg; INT val; } 4. + TRISTATE_RR_S = { GPR reg1; GPR reg2; } 4. + TRISTATE_RR_U = { GPR reg1; GPR reg2; } 4. + + TRISTATE_FF = { FPR reg1; FPR reg2; } 4. + + SEX_B = { GPR reg; } 4. + SEX_H = { GPR reg; } 4. + + IND_RC_B = { GPR reg; INT off; } 4. + IND_RC_H = { GPR reg; INT off; } 4. + IND_RC_H_S = { GPR reg; INT off; } 4. + IND_RC_W = { GPR reg; INT off; } 4. + IND_RR_W = { GPR reg1; GPR reg2; } 4. + IND_LABEL_W = { ADDR adr; } 4. + IND_RC_D = { GPR reg; INT off; } 8. + IND_RR_D = { GPR reg1; GPR reg2; } 8. + IND_LABEL_D = { ADDR adr; } 8. + + NOT_R = { GPR reg; } 4. + + AND_RR = { GPR reg1; GPR reg2; } 4. + AND_RC = { GPR reg; INT val; } 4. + OR_RR = { GPR reg1; GPR reg2; } 4. + OR_RC = { GPR reg; INT val; } 4. + XOR_RR = { GPR reg1; GPR reg2; } 4. + XOR_RC = { GPR reg; INT val; } 4. + +/* Floats */ + + FD = { FPR reg; } 8 reg. + FS = { FPR reg; } 4 reg. + +/* Comments */ + + LABELI = { ADDR msg; INT num; } 4 msg " " num. + + + + +SETS + + TOKEN = LABEL + CONST + LOCAL. + GPRI = GPR + GPRE. + + SUM_ALL = SUM_RC + SUM_RR. + + TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + + TRISTATE_RR_U + TRISTATE_FF. + + SEX_ALL = SEX_B + SEX_H. + + LOGICAL_ALL = NOT_R + AND_RR + AND_RC + OR_RR + OR_RC + XOR_RR + + XOR_RC. + + IND_ALL_W = IND_RC_W + IND_RR_W + IND_LABEL_W. + + IND_ALL_D = IND_RC_D + IND_RR_D + IND_LABEL_D. + + OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + + IND_ALL_W. + + +INSTRUCTIONS + + add GPRI:wo, GPRI:ro, GPRI:ro. + addX "add." GPRI:wo, GPRI:ro, GPRI:ro. + addi GPRI:wo, GPRI:ro, CONST:ro. + addis GPRI:wo, GPRI:ro, CONST+HILABEL:ro. + and GPRI:wo, GPRI:ro, GPRI:ro. + andc GPRI:wo, GPRI:ro, GPRI:ro. + andiX "andi." GPRI:wo, GPRI:ro, CONST:ro kills :cc. + andisX "andis." GPRI:wo, GPRI:ro, CONST:ro kills :cc. + b LABEL:ro. + bc CONST:ro, CONST:ro, LABEL:ro. + bcctr CONST:ro, CONST:ro, CONST:ro. + bcctrl CONST:ro, CONST:ro, CONST:ro. + bclr CONST:ro, CONST:ro, CONST:ro. + bl LABEL:ro. + cmp CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. + cmpi CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. + cmpl CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. + cmpli CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. + divw GPRI:wo, GPRI:ro, GPRI:ro. + divwu GPRI:wo, GPRI:ro, GPRI:ro. + eqv GPRI:wo, GPRI:ro, GPRI:ro. + extsb GPRI:wo, GPRI:ro. + extsh GPRI:wo, GPRI:ro. + fadd FD:wo, FD:ro, FD:ro. + fadds FS:wo, FS:ro, FS:ro. + fcmpo CR:wo, FD:ro, FD:ro. + fdiv FD:wo, FD:ro, FD:ro. + fdivs FS:wo, FS:ro, FS:ro. + fneg FS+FD:wo, FS+FD:ro. + fmul FD:wo, FD:ro, FD:ro. + fmuls FS:wo, FS:ro, FS:ro. + frsp FS:wo, FD:ro. + fsub FD:wo, FD:ro, FD:ro. + fsubs FS:wo, FS:ro, FS:ro. + fmr FS+FD:wo, FS+FD:ro. + lbzx GPRI:wo, GPR:ro, GPR:ro. + lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfd FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfdu FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfdx FD:wo, GPR:ro, GPR:ro. + lfs FS:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lfsu FS:wo, GPRINDIRECT+GPRINDIRECTLO:rw. + lfsx FS:wo, GPR:ro, GPR:ro. + lhzx GPRI:wo, GPR:ro, GPR:ro. + lhax GPRI:wo, GPR:ro, GPR:ro. + lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lhz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lwzu GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + lwzx GPRI:wo, GPR:ro, GPR:ro. + lwz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. + nand GPRI:wo, GPRI:ro, GPRI:ro. + neg GPRI:wo, GPRI:ro. + nor GPRI:wo, GPRI:ro, GPRI:ro. + mfcr GPRI:wo. + mullw GPRI:wo, GPRI:ro, GPRI:ro. + mfspr GPRI:wo, SPR:ro. + mtspr SPR:wo, GPRI:ro. + or GPRI:wo, GPRI:ro, GPRI:ro. + orc GPRI:wo, GPRI:ro, GPRI:ro. + ori GPRI:wo, GPRI:ro, CONST+LOLABEL:ro. + orX "or." GPRI:wo, GPRI:ro, GPRI:ro kills :cc. + rlwinm GPRI:wo, GPRI:ro, CONST:ro, CONST:ro, CONST:ro. + slw GPRI:wo, GPRI:ro, GPRI:ro. + subf GPRI:wo, GPRI:ro, GPRI:ro. + sraw GPRI:wo, GPRI:ro, GPRI:ro. + srawi GPRI:wo, GPRI:ro, CONST:ro. + srw GPRI:wo, GPRI:ro, GPRI:ro. + stb GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stbx GPRI:ro, GPR:ro, GPR:ro. + stfd FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfdu FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfdx FD:ro, GPR:ro, GPR:ro. + stfs FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfsu FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stfsx FS:ro, GPR:ro, GPR:ro. + sth GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + sthx GPRI:ro, GPR:ro, GPR:ro. + stw GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + stwx GPRI:ro, GPR:ro, GPR:ro. + stwu GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. + xor GPRI:wo, GPRI:ro, GPRI:ro. + xori GPRI:wo, GPRI:ro, CONST:ro. + + gpr_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. + gpr_gpr_si GPRI:wo, GPRI:ro, CONST:ro. + gpr_ro_gprindirect GPRI:ro, GPRINDIRECT:rw. + gpr_ro_gpr_gpr GPRI:ro, GPRI:ro, GPRI:ro. + gpr_wo_gprindirect GPRI:wo, GPRINDIRECT:ro. + gpr_wo_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. + + invalid "invalid". + comment "!" LABEL+LABELI:ro. + + + +MOVES + + from GPR to GPR + gen + COMMENT("move GPR->GPR") + or %2, %1, %1 + +/* GPRE exists solely to allow us to use regvar() (which can only be used in + an expression) as a register constant. */ + + from GPR to GPRE + gen + COMMENT("move GPR->GPRE") + or %2, %1, %1 + +/* Constants */ + + from CONST smalls(%val) to GPR + gen + COMMENT("move CONST->GPRE") + addi %2, R0, {CONST, lo(%1.val)} + + from CONST to GPR + gen + COMMENT("move CONST->GPRE") + addis %2, R0, {CONST, hi(%1.val)} + ori %2, %2, {CONST, lo(%1.val)} + + from LABEL to GPR + gen + COMMENT("move LABEL->GPR") + addis %2, R0, {HILABEL, %1.adr} + ori %2, %2, {LOLABEL, %1.adr} + +/* Sign extension */ + + from SEX_B to GPR + gen + COMMENT("move SEX_B->GPR") + extsb %2, %1.reg + + from SEX_H to GPR + gen + COMMENT("move SEX_H->GPR") + extsh %2, %1.reg + +/* Register + something */ + + from SUM_RC smalls(%off) to GPR + gen + COMMENT("move SUM_RC->GPR smalls") + addi %2, %1.reg, {CONST, lo(%1.off)} + + from SUM_RC to GPR + gen + COMMENT("move SUM_RC->GPR large") + addi %2, %1.reg, {CONST, los(%1.off)} + addis %2, %2, {CONST, his(%1.off)} + + from SUM_RR to GPR + gen + COMMENT("move SUM_RR->GPR") + add %2, %1.reg1, %1.reg2 + + from SUM_RR to GPR + gen + COMMENT("move SUM_RR->GPRE") + add %2, %1.reg1, %1.reg2 + +/* Read/write byte */ + + from IND_RC_B smalls(%off) to GPR + gen + COMMENT("move IND_RC_B->GPR small") + lbz %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_B to GPR + gen + COMMENT("move IND_RC_B->GPR large") + addis SCRATCH, %1.reg, {CONST, his(%1.off)} + lbz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + + from GPR to IND_RC_B smalls(%off) + gen + COMMENT("move GPR->IND_RC_B small") + stb %1, {GPRINDIRECT, %2.reg, %2.off} + + from GPR to IND_RC_B + gen + COMMENT("move GPR->IND_RC_B large") + addis SCRATCH, %2.reg, {CONST, his(%2.off)} + stb %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + +/* Read/write short */ + + from IND_RC_H smalls(%off) to GPR + gen + COMMENT("move IND_RC_H->GPR small") + lhz %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_H to GPR + gen + COMMENT("move IND_RC_H->GPR large") + addis SCRATCH, %1.reg, {CONST, his(%1.off)} + lhz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + + from IND_RC_H_S smalls(%off) to GPR + gen + COMMENT("move IND_RC_H_S->GPR small") + lha %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_H_S to GPR + gen + COMMENT("move IND_RC_H_S->GPR large") + addis SCRATCH, %1.reg, {CONST, his(%1.off)} + lha %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + + from GPR to IND_RC_H smalls(%off) + gen + COMMENT("move GPR->IND_RC_H small") + sth %1, {GPRINDIRECT, %2.reg, %2.off} + + from GPR to IND_RC_H + gen + COMMENT("move GPR->IND_RC_H large") + addis SCRATCH, %2.reg, {CONST, his(%2.off)} + sth %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + +/* Read word */ + + from IND_RC_W smalls(%off) to GPR + gen + COMMENT("move IND_RC_W->GPR small") + lwz %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_W to GPR + gen + COMMENT("move IND_RC_W->GPR large") + addis %2, %1.reg, {CONST, his(%1.off)} + lwz %2, {GPRINDIRECT, %2, los(%1.off)} + + from IND_RR_W to GPR + gen + COMMENT("move IND_RR_W->GPR") + lwzx %2, %1.reg1, %1.reg2 + + from IND_LABEL_W to GPR + gen + COMMENT("move IND_LABEL_W->GPR") + move {LABEL, %1.adr}, SCRATCH + lwz %2, {GPRINDIRECT, SCRATCH, 0} + + from IND_RC_W smalls(%off) to FS + gen + COMMENT("move IND_RC_W->FS small") + lfs %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_W to FS + gen + COMMENT("move IND_RC_W->FS large") + addis SCRATCH, %1.reg, {CONST, his(%1.off)} + lfs %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + + from IND_RR_W to FS + gen + COMMENT("move IND_RR_W->FS") + lfsx %2, %1.reg1, %1.reg2 + + from IND_LABEL_W to FS + gen + COMMENT("move IND_LABEL_W->FS") + move {LABEL, %1.adr}, SCRATCH + lfs %2, {GPRINDIRECT, SCRATCH, 0} + +/* Write word */ + + from GPR to IND_RC_W smalls(%off) + gen + COMMENT("move GPR->IND_RC_W small") + stw %1, {GPRINDIRECT, %2.reg, %2.off} + + from GPR to IND_RC_W + gen + COMMENT("move GPR->IND_RC_W large") + addis SCRATCH, %2.reg, {CONST, his(%2.off)} + stw %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + + from GPR to IND_RR_W + gen + COMMENT("move GPR->IND_RR_W") + stwx %1, %2.reg1, %2.reg2 + + from GPR to IND_LABEL_W + gen + COMMENT("move GPR->IND_LABEL_D") + move {LABEL, %2.adr}, SCRATCH + stw %1, {GPRINDIRECT, SCRATCH, 0} + + from FS to IND_RC_W smalls(%off) + gen + COMMENT("move FS->IND_RC_W small") + stfs %1, {GPRINDIRECT, %2.reg, %2.off} + + from FS to IND_RC_W + gen + COMMENT("move FS->IND_RC_W large") + addis SCRATCH, %2.reg, {CONST, his(%2.off)} + stfs %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + + from FS to IND_RR_W + gen + COMMENT("move FS->IND_RR_W") + stfsx %1, %2.reg1, %2.reg2 + + from FS to IND_LABEL_W + gen + COMMENT("move FS->IND_LABEL_D") + move {LABEL, %2.adr}, SCRATCH + stfs %1, {GPRINDIRECT, SCRATCH, 0} + +/* Read double */ + + from IND_RC_D smalls(%off) to FD + gen + COMMENT("move IND_RC_D->FD small") + lfd %2, {GPRINDIRECT, %1.reg, %1.off} + + from IND_RC_D to FD + gen + COMMENT("move IND_RC_D->FD large") + addis SCRATCH, %1.reg, {CONST, his(%1.off)} + lfd %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + + from IND_RR_D to FD + gen + COMMENT("move IND_RR_D->FD") + lfdx %2, %1.reg1, %1.reg2 + + from IND_LABEL_D to FD + gen + COMMENT("move IND_LABEL_D->FD") + move {LABEL, %1.adr}, SCRATCH + lfd %2, {GPRINDIRECT, SCRATCH, 0} + +/* Write double */ + + from FD to IND_RC_D smalls(%off) + gen + COMMENT("move FD->IND_RC_D small") + stfd %1, {GPRINDIRECT, %2.reg, %2.off} + + from FD to IND_RC_D + gen + COMMENT("move FD->IND_RC_D large") + addis SCRATCH, %2.reg, {CONST, his(%2.off)} + stfd %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + + from FD to IND_RR_D + gen + COMMENT("move FD->IND_RR_W") + stfdx %1, %2.reg1, %2.reg2 + + from FD to IND_LABEL_D + gen + COMMENT("move FD->IND_LABEL_D") + move {LABEL, %2.adr}, SCRATCH + stfd %1, {GPRINDIRECT, SCRATCH, 0} + +/* Extract condition code field (actually produces (CC&3)<<2) */ + + from CR0 to GPR + gen + COMMENT("move CR0->GPR") + mfcr %2 + rlwinm %2, %2, {CONST, 4}, {CONST, 32-4}, {CONST, 31-2} + +/* Comparisons */ + + from TRISTATE_RR_S to CR0 + gen + cmp %2, {CONST, 0}, %1.reg1, %1.reg2 + + from TRISTATE_RR_U to CR0 + gen + cmpl %2, {CONST, 0}, %1.reg1, %1.reg2 + + from TRISTATE_RC_S to CR0 + gen + COMMENT("move TRISTATE_RC_S->CR0 large") + move {CONST, %1.val}, SCRATCH + cmp %2, {CONST, 0}, %1.reg, SCRATCH + + from TRISTATE_RC_U smallu(%val) to CR0 + gen + COMMENT("move TRISTATE_RC_U->CR0 small") + cmpli %2, {CONST, 0}, %1.reg, {CONST, %1.val} + + from TRISTATE_RC_U to CR0 + gen + COMMENT("move TRISTATE_RC_U->CR0") + move {CONST, %1.val}, SCRATCH + cmpl %2, {CONST, 0}, %1.reg, SCRATCH + + from TRISTATE_FF to CR0 + gen + COMMENT("move TRISTATE_FF->CR0") + fcmpo %2, {FD, %1.reg1}, {FD, %1.reg2} + + from GPR to CR0 + gen + COMMENT("move GPR->CR0") + orX SCRATCH, %1, %1 /* alas, can't call test */ + + from TRISTATE_RR_S + TRISTATE_RC_S + TRISTATE_FF to GPR + gen + COMMENT("move TRISTATE_R*_S->GPR") + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tristate_s_table"}, %2 + lwzx %2, %2, SCRATCH + + from TRISTATE_RR_U + TRISTATE_RC_U to GPR + gen + COMMENT("move TRISTATE_R*_U->GPR") + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tristate_u_table"}, %2 + lwzx %2, %2, SCRATCH + +/* Logicals */ + + from NOT_R to GPR + gen + COMMENT("move NOT_R->GPR") + nor %2, %1.reg, %1.reg + + from AND_RR to GPR + gen + COMMENT("move AND_RR->GPR") + and %2, %1.reg1, %1.reg2 + + from AND_RC smallu(%val) to GPR + gen + COMMENT("move AND_RC->GPR small") + andiX %2, %1.reg, {CONST, %1.val} + + from AND_RC to GPR + gen + COMMENT("move AND_RC->GPR") + move {CONST, %1.val}, SCRATCH + and %2, %1.reg, SCRATCH + + from OR_RR to GPR + gen + COMMENT("move OR_RR->GPR") + or %2, %1.reg1, %1.reg2 + + from OR_RC smallu(%val) to GPR + gen + COMMENT("move OR_RC->GPR small") + ori %2, %1.reg, {CONST, %1.val} + + from OR_RC to GPR + gen + COMMENT("move OR_RC->GPR") + move {CONST, %1.val}, SCRATCH + or %2, %1.reg, SCRATCH + + from XOR_RR to GPR + gen + COMMENT("move XOR_RR->GPR") + xor %2, %1.reg1, %1.reg2 + + from XOR_RC smallu(%val) to GPR + gen + COMMENT("move XOR_RC->GPR small") + xori %2, %1.reg, {CONST, %1.val} + + from XOR_RC to GPR + gen + COMMENT("move XOR_RC->GPR") + move {CONST, %1.val}, SCRATCH + xor %2, %1.reg, SCRATCH + +/* Miscellaneous */ + + from OP_ALL_W + LABEL + CONST to GPRE + gen + move %1, %2.reg + + +TESTS + + to test GPR + gen + orX SCRATCH, %1, %1 + + + +STACKINGRULES + + from GPR to STACK + gen + COMMENT("stack GPR") + stwu %1, {GPRINDIRECT, SP, 0-4} + + from CONST to STACK + uses REG + gen + COMMENT("stack CONST") + move %1, %a + stwu %a, {GPRINDIRECT, SP, 0-4} + + from LABEL to STACK + uses REG + gen + COMMENT("stack LABEL") + move %1, {GPRE, %a} + stwu %a, {GPRINDIRECT, SP, 0-4} + + from SEX_B to STACK + gen + COMMENT("stack SEX_B") + extsb SCRATCH, %1.reg + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + + from SEX_H to STACK + gen + COMMENT("stack SEX_H") + extsh SCRATCH, %1.reg + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + + from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK + gen + move %1, {GPRE, SCRATCH} + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + + from IND_ALL_W to STACK + gen + move %1, SCRATCH + stwu SCRATCH, {GPRINDIRECT, SP, 0-4} + + from IND_ALL_D to STACK + gen + move %1, {FD, FSCRATCH} + stfdu {FD, FSCRATCH}, {GPRINDIRECT, SP, 0-8} + + from FD to STACK + gen + COMMENT("stack FD") + stfdu %1, {GPRINDIRECT, SP, 0-8} + + from FS to STACK + gen + COMMENT("stack FS") + stfsu %1, {GPRINDIRECT, SP, 0-4} + + from TOKEN to STACK + gen + invalid. + + + +COERCIONS + + from REG + uses REG + gen + COMMENT("coerce REG->REG") + move %1, %a + yields %a + + from CONST + uses REG + gen + COMMENT("coerce CONST->REG") + move %1, %a + yields %a + + from LABEL + uses REG + gen + COMMENT("coerce LABEL->REG") + move %1, {GPRE, %a} + yields %a + + from STACK + uses REG + gen + COMMENT("coerce STACK->REG") + lwz %a, {GPRINDIRECT, SP, 0} + addi SP, SP, {CONST, 4} + yields %a + + from SEX_B + uses REG + gen + COMMENT("coerce SEX_B->REG") + extsb %a, %1.reg + yields %a + + from SEX_H + uses REG + gen + COMMENT("coerce SEX_H->REG") + extsh %a, %1.reg + yields %a + + from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL + uses REG + gen + move %1, {GPRE, %a} + yields %a + + from FS + uses FREG + gen + fmr {FS, %a}, %1 + yields {FS, %a} + + from FD + uses FREG + gen + fmr {FD, %a}, %1 + yields {FD, %a} + + from STACK + uses FREG + gen + COMMENT("coerce STACK->FD") + lfd {FD, %a}, {GPRINDIRECT, SP, 0} + addi SP, SP, {CONST, 8} + yields {FD, %a} + + from STACK + uses FREG + gen + COMMENT("coerce STACK->FS") + lfs {FS, %a}, {GPRINDIRECT, SP, 0} + addi SP, SP, {CONST, 4} + yields {FS, %a} + + from IND_ALL_W + uses REG + gen + move %1, %a + yields %a + + from IND_ALL_W + uses FREG + gen + move %1, {FS, %a} + yields {FS, %a} + + from IND_ALL_D + uses FREG + gen + move %1, {FD, %a} + yields {FD, %a} + + + + +PATTERNS + +/* Intrinsics */ + + pat loc /* Load constant */ + yields {CONST, $1} + + pat dup $1==INT32 /* Duplicate word on top of stack */ + with GPR + yields %1 %1 + + pat dup $1==INT64 /* Duplicate double-word on top of stack */ + with GPR GPR + yields %2 %1 %2 %1 + + pat exg $1==INT32 /* Exchange top two words on stack */ + with GPR GPR + yields %1 %2 + + pat stl lol $1==$2 /* Store then load local */ + leaving + dup 4 + stl $1 + + pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ + leaving + dup INT32 + lal $1 + sti $2 + + pat ste loe $1==$2 /* Store then load external */ + leaving + dup 4 + ste $1 + + +/* Type conversions */ + + pat loc loc cii loc loc cii $1==$4 && $2==$5 /* madness, generated by the C compiler */ + leaving + loc $1 + loc $2 + cii + + pat loc loc cii loc loc cii $2==INT32 && $5==INT32 && $4<$2 /* madness, generated by the C compiler */ + leaving + loc $4 + loc $5 + cii + + pat loc loc ciu /* signed X -> unsigned X */ + leaving + loc $1 + loc $2 + cuu + + pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */ + /* nop */ + + pat loc loc cii $1==$2 /* signed X -> signed X */ + /* nop */ + + pat loc loc cui $1==$2 /* unsigned X -> signed X */ + /* nop */ + + pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */ + /* nop */ + + pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */ + /* nop */ + + pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ + with GPR + yields {SEX_B, %1} + + pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ + with GPR + yields {SEX_H, %1} + + + + + +/* Local variables */ + + pat lal /* Load address of local */ + yields {SUM_RC, FP, $1} + + pat lol inreg($1)>0 /* Load from local */ + yields {LOCAL, $1} + + pat lol /* Load from local */ + leaving + lal $1 + loi INT32 + + pat ldl /* Load double-word from local */ + leaving + lal $1 + loi INT32*2 + + pat stl inreg($1)>0 /* Store to local */ + with CONST + LABEL + GPR + OP_ALL_W + kills regvar($1), LOCAL %off==$1 + gen + move %1, {GPRE, regvar($1)} + + pat stl /* Store to local */ + leaving + lal $1 + sti INT32 + + pat sdl /* Store double-word to local */ + leaving + lal $1 + sti INT32*2 + + pat lil inreg($1)>0 /* Load from indirected local */ + uses REG + gen + lwz %a, {GPRINDIRECT, regvar($1), 0} + yields %a + + pat lil /* Load from indirected local */ + leaving + lol $1 + loi INT32 + + pat sil /* Save to indirected local */ + leaving + lol $1 + sti INT32 + + pat stl lol $1==$2 /* Save then load (generated by C compiler) */ + leaving + dup 4 + stl $1 + + pat zrl /* Zero local */ + leaving + loc 0 + stl $1 + + pat inl /* Increment local */ + leaving + lol $1 + loc 1 + adi 4 + stl $1 + + pat del /* Decrement local */ + leaving + lol $1 + loc 1 + sbi 4 + stl $1 + + +/* Global variables */ + + pat lpi /* Load address of external function */ + leaving + lae $1 + + pat lae /* Load address of external */ + yields {LABEL, $1} + + pat loe /* Load word external */ + leaving + lae $1 + loi INT32 + + pat ste /* Store word external */ + leaving + lae $1 + sti INT32 + + pat lde /* Load double-word external */ + leaving + lae $1 + loi INT64 + + pat sde /* Store double-word external */ + leaving + lae $1 + sti INT64 + + pat zre /* Zero external */ + leaving + loc 0 + ste $1 + + pat ine /* Increment external */ + uses REG={LABEL, $1}, REG + gen + lwz %b, {GPRINDIRECT, %a, 0} + addi %b, %b, {CONST, 1} + stw %b, {GPRINDIRECT, %a, 0} + + pat dee /* Decrement external */ + uses REG={LABEL, $1}, REG + gen + lwz %b, {GPRINDIRECT, %a, 0} + addi %b, %b, {CONST, 0-1} + stw %b, {GPRINDIRECT, %a, 0} + + + +/* Structures */ + + pat lof /* Load word offsetted */ + leaving + adp $1 + loi INT32 + + pat ldf /* Load double-word offsetted */ + leaving + adp $1 + loi INT64 + + pat stf /* Store word offsetted */ + leaving + adp $1 + sti INT32 + + pat sdf /* Store double-word offsetted */ + leaving + adp $1 + sti INT64 + + + +/* Loads and stores */ + + pat loi $1==INT8 /* Load byte indirect */ + with GPR + uses REG + gen + lbz %a, {GPRINDIRECT, %1, 0} + yields %a + with SUM_RR + uses reusing %1, REG + gen + lbzx %a, %1.reg1, %1.reg2 + yields %a + with SUM_RC + uses REG + gen + move {IND_RC_B, %1.reg, %1.off}, %a + yields %a + + pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ + with GPR + uses REG + gen + lha %a, {GPRINDIRECT, %1, 0} + yields %a + with SUM_RR + uses reusing %1, REG + gen + lhax %a, %1.reg1, %1.reg2 + yields %a + with SUM_RC + uses REG + gen + move {IND_RC_H_S, %1.reg, %1.off}, %a + yields %a + + pat loi $1==INT16 /* Load half-word indirect */ + with GPR + uses REG + gen + lhz %a, {GPRINDIRECT, %1, 0} + yields %a + with SUM_RR + uses reusing %1, REG + gen + lhzx %a, %1.reg1, %1.reg2 + yields %a + with SUM_RC + uses REG + gen + move {IND_RC_H, %1.reg, %1.off}, %a + yields %a + + pat loi $1==INT32 /* Load word indirect */ + with GPR + yields {IND_RC_W, %1, 0} + with SUM_RC + yields {IND_RC_W, %1.reg, %1.off} + with SUM_RR + yields {IND_RR_W, %1.reg1, %1.reg2} + with LABEL + yields {IND_LABEL_W, %1.adr} + + pat loi $1==INT64 /* Load double-word indirect */ + with GPR + yields {IND_RC_D, %1, 0} + with SUM_RC + yields {IND_RC_D, %1.reg, %1.off} + with SUM_RR + yields {IND_RR_D, %1.reg1, %1.reg2} + with LABEL + yields {IND_LABEL_D, %1.adr} + + pat loi /* Load arbitrary size */ + leaving + loc $1 + los INT32 + + pat los /* Load arbitrary size */ + with GPR3 GPR4 STACK + kills ALL + gen + bl {LABEL, ".los"} + + pat sti $1==INT8 /* Store byte indirect */ + with GPR GPR + gen + stb %2, {GPRINDIRECT, %1, 0} + with SUM_RR GPR + gen + stbx %2, %1.reg1, %1.reg2 + with SUM_RC GPR + gen + move %2, {IND_RC_B, %1.reg, %1.off} + with GPR SEX_B + gen + stb %2.reg, {GPRINDIRECT, %1, 0} + with SUM_RR SEX_B + gen + stbx %2.reg, %1.reg1, %1.reg2 + with SUM_RC SEX_B + gen + move %2.reg, {IND_RC_B, %1.reg, %1.off} + + pat sti $1==INT16 /* Store half-word indirect */ + with GPR GPR + gen + sth %2, {GPRINDIRECT, %1, 0} + with SUM_RR GPR + gen + sthx %2, %1.reg1, %1.reg2 + with SUM_RC GPR + gen + move %2, {IND_RC_H, %1.reg, %1.off} + with GPR SEX_H + gen + sth %2.reg, {GPRINDIRECT, %1, 0} + with SUM_RR SEX_H + gen + sthx %2.reg, %1.reg1, %1.reg2 + with SUM_RC SEX_H + gen + move %2.reg, {IND_RC_H, %1.reg, %1.off} + + pat sti $1==INT32 /* Store word indirect */ + with GPR GPR+FS + gen + move %2, {IND_RC_W, %1, 0} + with SUM_RR GPR+FS + gen + move %2, {IND_RR_W, %1.reg1, %1.reg2} + with SUM_RC GPR+FS + gen + move %2, {IND_RC_W, %1.reg, %1.off} + with LABEL GPR+FS + gen + move %2, {IND_LABEL_W, %1.adr} + + pat sti $1==INT64 /* Store double-word indirect */ + with GPR FD + gen + move %2, {IND_RC_D, %1, 0} + with SUM_RR FD + gen + move %2, {IND_RR_D, %1.reg1, %1.reg2} + with SUM_RC FD + gen + move %2, {IND_RC_D, %1.reg, %1.off} + with GPR GPR GPR + gen + stw %2, {GPRINDIRECT, %1, 0} + stw %3, {GPRINDIRECT, %1, 4} + with SUM_RC GPR GPR + gen + move %2, {IND_RC_W, %1.reg, %1.off} + move %3, {IND_RC_W, %1.reg, %1.off+4} + with LABEL FD + gen + move %2, {IND_LABEL_D, %1.adr} + + + pat sti /* Store arbitrary size */ + leaving + loc $1 + sts INT32 + + pat sts /* Load arbitrary size */ + with GPR3 GPR4 STACK + kills ALL + gen + bl {LABEL, ".sts"} + + + +/* Arithmetic wrappers */ + + pat ads $1==4 /* Add var to pointer */ + leaving adi $1 + + pat sbs $1==4 /* Subtract var from pointer */ + leaving sbi $1 + + pat adp /* Add constant to pointer */ + leaving + loc $1 + adi 4 + + pat adu /* Add unsigned */ + leaving + adi $1 + + pat sbu /* Subtract unsigned */ + leaving + sbi $1 + + pat inc /* Add 1 */ + leaving + loc 1 + adi 4 + + pat dec /* Subtract 1 */ + leaving + loc 1 + sbi 4 + + pat loc mlu $2==2 /* Unsigned multiply by constant */ + leaving + loc $1 + mli 4 + + pat mlu /* Unsigned multiply by var */ + leaving + mli $1 + + pat loc slu /* Shift left unsigned by constant amount */ + leaving + loc $1 + sli $2 + + pat slu /* Shift left unsigned by variable amount */ + leaving + sli $1 + + + +/* Word arithmetic */ + + pat adi $1==4 /* Add word (second + top) */ + with REG REG + yields {SUM_RR, %1, %2} + with CONST REG + yields {SUM_RC, %2, %1.val} + with REG CONST + yields {SUM_RC, %1, %2.val} + with CONST SUM_RC + yields {SUM_RC, %2.reg, %2.off+%1.val} + with CONST LABEL + yields {LABEL, %2.adr+%1.val} + + pat sbi $1==4 /* Subtract word (second - top) */ + with REG REG + uses reusing %2, REG + gen + subf %a, %1, %2 + yields %a + with CONST REG + yields {SUM_RC, %2, 0-%1.val} + with CONST SUM_RC + yields {SUM_RC, %2.reg, %2.off-%1.val} + with CONST LABEL + yields {LABEL, %2.adr+(0-%1.val)} + + pat ngi $1==4 /* Negate word */ + with REG + uses reusing %1, REG + gen + neg %a, %1 + yields %a + + pat mli $1==4 /* Multiply word (second * top) */ + with REG REG + uses reusing %2, REG + gen + mullw %a, %2, %1 + yields %a + + pat dvi $1==4 /* Divide word (second / top) */ + with REG REG + uses reusing %2, REG + gen + divw %a, %2, %1 + yields %a + + pat dvu $1==4 /* Divide unsigned word (second / top) */ + with REG REG + uses reusing %2, REG + gen + divwu %a, %2, %1 + yields %a + + pat rmi $1==4 /* Remainder word (second % top) */ + with REG REG + uses REG + gen + divw %a, %2, %1 + mullw %a, %a, %1 + subf %a, %a, %2 + yields %a + + pat rmu $1==4 /* Remainder unsigned word (second % top) */ + with REG REG + uses REG + gen + divwu %a, %2, %1 + mullw %a, %a, %1 + subf %a, %a, %2 + yields %a + + pat and $1==4 /* AND word */ + with GPR NOT_R + uses reusing %1, REG + gen + andc %a, %1, %2.reg + yields %a + with NOT_R GPR + uses reusing %1, REG + gen + andc %a, %2, %1.reg + yields %a + with GPR GPR + yields {AND_RR, %1, %2} + with GPR CONST + yields {AND_RC, %1, %2.val} + with CONST GPR + yields {AND_RC, %2, %1.val} + + pat and !defined($1) /* AND set */ + with STACK + gen + bl {LABEL, ".and"} + + pat ior $1==4 /* OR word */ + with GPR NOT_R + uses reusing %1, REG + gen + orc %a, %1, %2.reg + yields %a + with NOT_R GPR + uses reusing %2, REG + gen + orc %a, %2, %1.reg + yields %a + with GPR GPR + yields {OR_RR, %1, %2} + with GPR CONST + yields {OR_RC, %1, %2.val} + with CONST GPR + yields {OR_RC, %2, %1.val} + + pat ior !defined($1) /* OR set */ + with STACK + gen + bl {LABEL, ".ior"} + + pat xor $1==4 /* XOR word */ + with GPR GPR + yields {XOR_RR, %1, %2} + with GPR CONST + yields {XOR_RC, %1, %2.val} + with CONST GPR + yields {XOR_RC, %2, %1.val} + + pat xor !defined($1) /* XOR set */ + with STACK + gen + bl {LABEL, ".xor"} + + pat com $1==INT32 /* NOT word */ + with AND_RR + uses REG + gen + nand %a, %1.reg1, %1.reg2 + yields %a + with OR_RR + uses REG + gen + nor %a, %1.reg1, %1.reg2 + yields %a + with XOR_RR + uses REG + gen + eqv %a, %1.reg1, %1.reg2 + yields %a + with GPR + yields {NOT_R, %1} + + pat com !defined($1) /* NOT set */ + with STACK + gen + bl {LABEL, ".com"} + + pat sli $1==4 /* Shift left (second << top) */ + with CONST GPR + uses reusing %2, REG + gen + rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} + yields %a + with GPR GPR + uses reusing %2, REG + gen + slw %a, %2, %1 + yields %a + + pat sri $1==4 /* Shift right signed (second >> top) */ + with CONST GPR + uses reusing %2, REG + gen + srawi %a, %2, {CONST, %1.val & 0x1F} + yields %a + with GPR GPR + uses reusing %2, REG + gen + sraw %a, %2, %1 + yields %a + + pat sru $1==4 /* Shift right unsigned (second >> top) */ + with CONST GPR + uses reusing %2, REG + gen + rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} + yields %a + with GPR GPR + uses reusing %2, REG + gen + srw %a, %2, %1 + yields %a + + + +/* Arrays */ + + pat aar $1==INT32 /* Index array */ + with GPR3 GPR4 GPR5 + gen + bl {LABEL, ".aar4"} + yields R3 + + pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */ + leaving + lae $1 + aar INT32 + loi rom($1, 3) + + pat lar $1==INT32 /* Load array */ + with GPR3 GPR4 GPR5 STACK + kills ALL + gen + bl {LABEL, ".lar4"} + + pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */ + leaving + lae $1 + aar INT32 + sti rom($1, 3) + + pat sar $1==INT32 /* Store array */ + with GPR3 GPR4 GPR5 STACK + kills ALL + gen + bl {LABEL, ".sar4"} + + + + +/* Sets */ + + pat set defined($1) /* Create word with set bit */ + leaving + loc 1 + exg INT32 + sli INT32 + + pat set !defined($1) /* Create structure with set bit (variable) */ + with GPR3 GPR4 STACK + gen + bl {LABEL, ".set"} + + pat inn defined($1) /* Test for set bit */ + leaving + set INT32 + and INT32 + + pat inn !defined($1) /* Test for set bit (variable) */ + with GPR3 STACK + gen + bl {LABEL, ".inn"} + + + +/* Boolean resolutions */ + + pat teq /* top = (top == 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".teq_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + pat tne /* top = (top != 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tne_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + pat tlt /* top = (top < 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tlt_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + pat tle /* top = (top <= 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tle_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + pat tgt /* top = (top > 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tgt_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + pat tge /* top = (top >= 0) */ + with TRISTATE_ALL + GPR + uses reusing %1, REG + gen + move %1, C0 + move C0, SCRATCH + move {LABEL, ".tge_table"}, %a + lwzx %a, %a, SCRATCH + yields %a + + + + +/* Simple branches */ + + pat zeq /* Branch if signed top == 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFTRUE, EQ, {LABEL, $1} + + pat beq + leaving + cmi INT32 + zeq $1 + + pat zne /* Branch if signed top != 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFFALSE, EQ, {LABEL, $1} + + pat bne + leaving + cmi INT32 + zne $1 + + pat zgt /* Branch if signed top > 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFTRUE, GT, {LABEL, $1} + + pat bgt + leaving + cmi INT32 + zgt $1 + + pat zge /* Branch if signed top >= 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFFALSE, LT, {LABEL, $1} + + pat bge + leaving + cmi INT32 + zge $1 + + pat zlt /* Branch if signed top < 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFTRUE, LT, {LABEL, $1} + + pat blt + leaving + cmi INT32 + zlt $1 + + pat zle /* Branch if signed top >= 0 */ + with TRISTATE_ALL+GPR STACK + gen + move %1, C0 + bc IFFALSE, GT, {LABEL, $1} + + pat ble + leaving + cmi INT32 + zle $1 + + +/* Compare and jump */ + + pat cmi /* Signed tristate compare */ + with CONST GPR + yields {TRISTATE_RC_S, %2, %1.val} + with GPR GPR + yields {TRISTATE_RR_S, %2, %1} + + pat cmu /* Unsigned tristate compare */ + with CONST GPR + yields {TRISTATE_RC_U, %2, %1.val} + with GPR GPR + yields {TRISTATE_RR_U, %2, %1} + + pat cmp /* Compare pointers */ + leaving + cmu INT32 + + pat cms $1==INT32 /* Compare blocks (word sized) */ + leaving + cmi INT32 + + + + +/* Other branching and labelling */ + + pat lab topeltsize($1)==4 && !fallthrough($1) + gen + labeldef $1 + yields R3 + + pat lab topeltsize($1)==4 && fallthrough($1) + with GPR3 + gen + labeldef $1 + yields %1 + + pat lab topeltsize($1)!=4 + with STACK + kills ALL + gen + labeldef $1 + + pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ + with GPR3 STACK + gen + b {LABEL, $1} + + pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ + with STACK + gen + b {LABEL, $1} + + + +/* Miscellaneous */ + + pat cal /* Call procedure */ + with STACK + kills ALL + gen + bl {LABEL, $1} + + pat cai /* Call procedure indirect */ + with GPR STACK + kills ALL + gen + mtspr CTR, %1 + bcctrl ALWAYS, {CONST, 0}, {CONST, 0} + + pat lfr $1==INT32 /* Load function result, word */ + yields R3 + + pat lfr $1==INT64 /* Load function result, double-word */ + yields R4 R3 + + pat ret $1==0 /* Return from procedure */ + gen + return + b {LABEL, ".ret"} + + pat ret $1==INT32 /* Return from procedure, word */ + with GPR3 + gen + return + b {LABEL, ".ret"} + + pat ret $1==INT64 /* Return from procedure, double-word */ + with GPR3 GPR4 + gen + return + b {LABEL, ".ret"} + + pat blm /* Block move constant length */ + with GPR GPR STACK + uses REG + gen + move {CONST, $1}, %a + stwu %a, {GPRINDIRECT, SP, 0-4} + stwu %2, {GPRINDIRECT, SP, 0-4} + stwu %1, {GPRINDIRECT, SP, 0-4} + bl {LABEL, "_memmove"} + addi SP, SP, {CONST, 12} + + pat bls /* Block move variable length */ + with GPR GPR GPR STACK + gen + stwu %1, {GPRINDIRECT, SP, 0-4} + stwu %3, {GPRINDIRECT, SP, 0-4} + stwu %2, {GPRINDIRECT, SP, 0-4} + bl {LABEL, "_memmove"} + addi SP, SP, {CONST, 12} + + pat csa /* Array-lookup switch */ + with GPR3 GPR4 STACK + gen + b {LABEL, ".csa"} + + pat csb /* Table-lookup switch */ + with GPR3 GPR4 STACK + gen + b {LABEL, ".csb"} + + + +/* EM specials */ + + pat fil /* Set current filename */ + leaving + lae $1 + ste ".filename" + + pat lin /* Set current line number */ + leaving + loc $1 + ste ".linenumber" + + pat lni /* Increment line number */ + leaving + ine ".linenumber" + + pat lim /* Load EM trap ignore mask */ + leaving + lde ".ignmask" + + pat sim /* Store EM trap ignore mask */ + leaving + ste ".ignmask" + + pat trp /* Raise EM trap */ + with GPR3 + gen + bl {LABEL, ".trap"} + + pat sig /* Set trap handler */ + leaving + ste ".trppc" + + pat rtt /* Return from trap */ + leaving + ret 0 + + pat lxl $1==0 /* Load FP */ + leaving + lor 0 + + pat lxl $1==1 /* Load caller's FP */ + leaving + lxl 0 + dch + + pat dch /* FP -> caller FP */ + with GPR + uses reusing %1, REG + gen + lwz %a, {GPRINDIRECT, %1, FP_OFFSET} + yields %a + + pat lpb /* Convert FP to argument address */ + leaving + adp EM_BSIZE + + pat lxa /* Load caller's SP */ + leaving + lxl $1 + lpb + + pat gto /* longjmp */ + uses REG + gen + move {LABEL, $1}, %a + move {IND_RC_W, %a, 8}, FP + move {IND_RC_W, %a, 4}, SP + move {IND_RC_W, %a, 0}, %a + mtspr CTR, %a + bcctr ALWAYS, {CONST, 0}, {CONST, 0} + +#if 0 + + pat gto /* longjmp */ + with STACK + gen + ld {LABEL, $1+2} + wspec {CONST, 1} + ld {LABEL, $1+4} + wspec {CONST, 0} + ld {LABEL, $1+0} + wspec {CONST, 2} + + pat str $1==1 /* Store special GPRister */ + with GPR0 + gen + wspec {CONST, $1} + +#endif + + pat lor $1==0 /* Load FP */ + uses REG + gen + move FP, %a + yields %a + + pat lor $1==1 /* Load SP */ + uses REG + gen + move SP, %a + yields %a + + pat lor $1==2 /* Load HP */ + leaving + loe ".reghp" + + pat str $1==0 /* Store FP */ + with GPR + gen + move %1, FP + + pat str $1==1 /* Store SP */ + with GPR + gen + move %1, SP + + pat str $1==2 /* Store HP */ + leaving + ste ".reghp" + + pat ass /* Adjust stack by variable amount */ + with CONST + gen + move {SUM_RC, SP, %1.val}, {GPRE, SP} + with GPR + gen + move {SUM_RR, SP, %1}, {GPRE, SP} + + pat asp /* Adjust stack by constant amount */ + leaving + loc $1 + ass + diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s new file mode 100644 index 000000000..894a7540e --- /dev/null +++ b/mach/vc4/test/opcodes.s @@ -0,0 +1,77 @@ +# +/* + * VideoCore IV assembler test file + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +main: + nop + rti + + b r0 + b r31 + bl r0 + bl r31 + tbb r0 + tbb r15 + tbs r0 + tbs r15 + + mov r0, r1 + cmn r0, r1 + add r0, r1 + bic r0, r1 + mul r0, r1 + eor r0, r1 + sub r0, r1 + and r0, r1 + mvn r0, r1 + ror r0, r1 + cmp r0, r1 + rsb r0, r1 + btst r0, r1 + or r0, r1 + extu r0, r1 + max r0, r1 + bset r0, r1 + min r0, r1 + bclr r0, r1 + adds2 r0, r1 + bchg r0, r1 + adds4 r0, r1 + adds8 r0, r1 + adds16 r0, r1 + exts r0, r1 + neg r0, r1 + lsr r0, r1 + clz r0, r1 + lsl r0, r1 + brev r0, r1 + asr r0, r1 + abs r0, r1 + + mov r0, #31 + cmn r0, #31 + add r0, #31 + bic r0, #31 + mul r0, #31 + eor r0, #31 + sub r0, #31 + and r0, #31 + mvn r0, #31 + ror r0, #31 + cmp r0, #31 + rsb r0, #31 + btst r0, #31 + or r0, #31 + extu r0, #31 + max r0, #31 + diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk new file mode 100644 index 000000000..20aed87f7 --- /dev/null +++ b/plat/rpi/build.mk @@ -0,0 +1,47 @@ +# Build script for Raspberry Pi bare-metal executables (using the +# VideoCore IV processor, not the ARM). +# +# © 2013 David Given +# This file is redistributable under the terms of the 3-clause BSD license. +# See the file 'Copying' in the root of the distribution for the full text. + +ARCH := vc4 +PLATFORM := rpi +OPTIMISATION := -O + +D := plat/rpi/ + +platform-headers := \ + ack/config.h + +platform-libsys := \ + _hol0.s \ + +ifeq (x,y) + errno.s \ + _sys_rawread.s \ + _sys_rawwrite.s \ + open.c \ + creat.c \ + close.c \ + read.c \ + write.c \ + brk.c \ + getpid.c \ + kill.c \ + isatty.c \ + lseek.c \ + time.c \ + signal.c +endif + +$(eval $(call build-platform)) + +define build-rpi-boot-impl + $(call reset) + $(call ackfile, $D/boot.s) + $(call installto, $(PLATIND)/$(PLATFORM)/boot.o) +endef + +#(eval $(build-rpi-boot-impl)) + diff --git a/plat/rpi/descr b/plat/rpi/descr new file mode 100644 index 000000000..41dfc400f --- /dev/null +++ b/plat/rpi/descr @@ -0,0 +1,69 @@ +# $Source$ +# $State$ +# $Revision$ + +var w=2 +var p=2 +var s=2 +var l=4 +var f=4 +var d=8 +var ARCH=vc4 +var PLATFORM=rpi +var PLATFORMDIR={EM}/share/ack/{PLATFORM} +var CPP_F=-D__unix +var ALIGN=-a0:1 -a1:1 -a2:1 -a3:1 +var MACHOPT_F=-m8 + +# Override the setting in fe so that files compiled for this platform can see +# the platform-specific headers. + +var C_INCLUDES=-I{PLATFORMDIR}/include -I{EM}/share/ack/include/ansi + +name be + from .m.g + to .s + program {EM}/lib/ack/{PLATFORM}/ncg + args < + stdout + need .e +end +name as + from .s.so + to .o + program {EM}/lib/ack/{PLATFORM}/as + args - -o > < + prep cond +end +name led + from .o.a + to .out + program {EM}/lib/ack/em_led + mapflag -l* LNAME={PLATFORMDIR}/lib* + mapflag -i SEPID=-b1:0 + mapflag -fp FLOATS={EM}/{ILIB}fp + args {ALIGN} {SEPID?} \ + (.e:{HEAD}={PLATFORMDIR}/boot.o) \ + ({RTS}:.ocm.b={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.c={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.mod={PLATFORMDIR}/modula2.o) \ + ({RTS}:.p={PLATFORMDIR}/pascal.o) \ + -o > < \ + (.p:{TAIL}={PLATFORMDIR}/libpascal.a) \ + (.b:{TAIL}={PLATFORMDIR}/libbasic.a) \ + (.mod:{TAIL}={PLATFORMDIR}/libmodula2.a) \ + (.ocm:{TAIL}={PLATFORMDIR}/liboccam.a) \ + (.ocm.b.mod.c.p:{TAIL}={PLATFORMDIR}/libc.a) \ + {FLOATS?} \ + (.e:{TAIL}={PLATFORMDIR}/libem.a \ + {PLATFORMDIR}/libsys.a \ + {PLATFORMDIR}/libend.a) + linker +end +name cv + from .out + to .img + program {EM}/bin/aslod + args < > + outfile raspberrypi.bin +end diff --git a/plat/rpi/include/ack/config.h b/plat/rpi/include/ack/config.h new file mode 100644 index 000000000..fd2c48cba --- /dev/null +++ b/plat/rpi/include/ack/config.h @@ -0,0 +1,11 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _ACK_CONFIG_H +#define _ACK_CONFIG_H + +#endif diff --git a/plat/rpi/include/unistd.h b/plat/rpi/include/unistd.h new file mode 100644 index 000000000..ddd8739d3 --- /dev/null +++ b/plat/rpi/include/unistd.h @@ -0,0 +1,73 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _UNISTD_H +#define _UNISTD_H + +#include + +/* Types */ + +typedef int pid_t; +typedef int mode_t; + +/* Constants for file access (open and friends) */ + +enum +{ + O_ACCMODE = 0x3, + + O_RDONLY = 0, + O_WRONLY = 1, + O_RDWR = 2, + + O_CREAT = 0100, + O_TRUNC = 01000, + O_APPEND = 02000, + O_NONBLOCK = 04000 +}; + +/* Special variables */ + +extern char** environ; + +/* Implemented system calls */ + +extern void _exit(int); +extern pid_t getpid(void); +extern void* sbrk(intptr_t increment); +extern int isatty(int d); +extern off_t lseek(int fildes, off_t offset, int whence); +extern int close(int d); +extern int open(const char* path, int access, ...); +extern int creat(const char* path, mode_t mode); +extern int read(int fd, void* buffer, size_t count); +extern int write(int fd, void* buffer, size_t count); + +/* Unimplemented system calls (these are just prototypes to let the library + * compile). */ + +extern int fcntl(int fd, int op, ...); + +/* Signal handling */ + +typedef int sig_atomic_t; + +#define SIG_ERR ((sighandler_t) -1) /* Error return. */ +#define SIG_DFL ((sighandler_t) 0) /* Default action. */ +#define SIG_IGN ((sighandler_t) 1) /* Ignore signal. */ + +#define SIGABRT 6 /* Abort (ANSI) */ +#define SIGILL 11 /* Illegal instruction */ + +#define _NSIG 32 /* Biggest signal number + 1 + (not including real-time signals). */ +typedef void (*sighandler_t)(int); +extern sighandler_t signal(int signum, sighandler_t handler); +extern int raise(int signum); + +#endif diff --git a/plat/rpi/libsys/_hol0.s b/plat/rpi/libsys/_hol0.s new file mode 100644 index 000000000..eed00817d --- /dev/null +++ b/plat/rpi/libsys/_hol0.s @@ -0,0 +1,22 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +.sect .bss + +! This data block is used to store information about the current line number +! and file. + +.define hol0 +.comm hol0, 8 diff --git a/plat/rpi/libsys/_sys_rawread.s b/plat/rpi/libsys/_sys_rawread.s new file mode 100644 index 000000000..02edba21a --- /dev/null +++ b/plat/rpi/libsys/_sys_rawread.s @@ -0,0 +1,26 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +.sect .text + +! Reads a single byte. + +.define __sys_rawread +__sys_rawread: + xorb ah, ah + int 0x16 + xorb ah, ah + ret + \ No newline at end of file diff --git a/plat/rpi/libsys/_sys_rawwrite.s b/plat/rpi/libsys/_sys_rawwrite.s new file mode 100644 index 000000000..a424574d7 --- /dev/null +++ b/plat/rpi/libsys/_sys_rawwrite.s @@ -0,0 +1,32 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +.sect .text + +! Writes a single byte to the console. + +.define __sys_rawwrite +.extern __sys_rawwrite + +__sys_rawwrite: + push bp + mov bp, sp + + movb al, 4(bp) + movb ah, 0x0E + mov bx, 0x0007 + int 0x10 + jmp .cret + \ No newline at end of file diff --git a/plat/rpi/libsys/brk.c b/plat/rpi/libsys/brk.c new file mode 100644 index 000000000..cff32b9a9 --- /dev/null +++ b/plat/rpi/libsys/brk.c @@ -0,0 +1,45 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +#define OUT_OF_MEMORY (void*)(-1) /* sbrk returns this on failure */ +#define STACK_BUFFER 128 /* number of bytes to leave for stack */ + +extern char _end[1]; +static char* current = _end; + +int brk(void* newend) +{ + /* This variable is used to figure out the current stack pointer, + * by taking its address. */ + char dummy; + char* p = newend; + + if ((p > (&dummy - STACK_BUFFER)) || + (p < _end)) + return -1; + + current = p; + return 0; +} + +void* sbrk(intptr_t increment) +{ + char* old; + + if (increment == 0) + return current; + + old = current; + if (brk(old + increment) < 0) + return OUT_OF_MEMORY; + + return old; +} diff --git a/plat/rpi/libsys/close.c b/plat/rpi/libsys/close.c new file mode 100644 index 000000000..60fa0f96b --- /dev/null +++ b/plat/rpi/libsys/close.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int close(int fd) +{ + errno = EBADF; + return -1; +} diff --git a/plat/rpi/libsys/creat.c b/plat/rpi/libsys/creat.c new file mode 100644 index 000000000..7c009e6a0 --- /dev/null +++ b/plat/rpi/libsys/creat.c @@ -0,0 +1,17 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +int open(const char* path, int access, ...) +{ + errno = EACCES; + return -1; +} diff --git a/plat/rpi/libsys/errno.s b/plat/rpi/libsys/errno.s new file mode 100644 index 000000000..a2e1f8b55 --- /dev/null +++ b/plat/rpi/libsys/errno.s @@ -0,0 +1,31 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +#define D(e) .define e; e + +.sect .data + +! Define various ACK error numbers. Note that these are *not* ANSI C +! errnos, and are used for different purposes. + +D(ERANGE) = 1 +D(ESET) = 2 +D(EIDIVZ) = 6 +D(EHEAP) = 17 +D(EILLINS) = 18 +D(EODDZ) = 19 +D(ECASE) = 20 +D(EBADMON) = 25 + diff --git a/plat/rpi/libsys/getpid.c b/plat/rpi/libsys/getpid.c new file mode 100644 index 000000000..0ae1f6154 --- /dev/null +++ b/plat/rpi/libsys/getpid.c @@ -0,0 +1,15 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +pid_t getpid(void) +{ + return 0; +} diff --git a/plat/rpi/libsys/isatty.c b/plat/rpi/libsys/isatty.c new file mode 100644 index 000000000..83837ba9c --- /dev/null +++ b/plat/rpi/libsys/isatty.c @@ -0,0 +1,15 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int isatty(int fd) +{ + return 1; +} diff --git a/plat/rpi/libsys/kill.c b/plat/rpi/libsys/kill.c new file mode 100644 index 000000000..bacc405df --- /dev/null +++ b/plat/rpi/libsys/kill.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int kill(pid_t pid, int sig) +{ + errno = EINVAL; + return -1; +} diff --git a/plat/rpi/libsys/libsys.h b/plat/rpi/libsys/libsys.h new file mode 100644 index 000000000..e9bff7e1a --- /dev/null +++ b/plat/rpi/libsys/libsys.h @@ -0,0 +1,18 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef LIBSYS_H +#define LIBSYS_H + +extern void _sys_rawwrite(unsigned char b); +extern unsigned char _sys_rawread(void); + +extern void _sys_write_tty(char c); + +/* extern int _sys_ttyflags; */ + +#endif diff --git a/plat/rpi/libsys/lseek.c b/plat/rpi/libsys/lseek.c new file mode 100644 index 000000000..9a487d747 --- /dev/null +++ b/plat/rpi/libsys/lseek.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +off_t lseek(int fd, off_t offset, int whence) +{ + errno = EINVAL; + return -1; +} diff --git a/plat/rpi/libsys/open.c b/plat/rpi/libsys/open.c new file mode 100644 index 000000000..cbdc30ec1 --- /dev/null +++ b/plat/rpi/libsys/open.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +int creat(const char* path, int mode) +{ + return open(path, O_CREAT|O_WRONLY|O_TRUNC, mode); +} diff --git a/plat/rpi/libsys/read.c b/plat/rpi/libsys/read.c new file mode 100644 index 000000000..476689320 --- /dev/null +++ b/plat/rpi/libsys/read.c @@ -0,0 +1,45 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +int read(int fd, void* buffer, size_t count) +{ + char i; + + /* We're only allowed to read from fd 0, 1 or 2. */ + + if ((fd < 0) || (fd > 2)) + { + errno = EBADF; + return -1; + } + + /* Empty buffer? */ + + if (count == 0) + return 0; + + /* Read one byte. */ + + i = _sys_rawread(); +#if 0 + if ((i == '\r') && !(_sys_ttyflags & RAW)) + i = '\n'; + if (_sys_ttyflags & ECHO) + _sys_write_tty(i); +#endif + if (i == '\r') + i = '\n'; + _sys_write_tty(i); + + *(char*)buffer = i; + return 1; +} diff --git a/plat/rpi/libsys/signal.c b/plat/rpi/libsys/signal.c new file mode 100644 index 000000000..10a2ded29 --- /dev/null +++ b/plat/rpi/libsys/signal.c @@ -0,0 +1,17 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +sighandler_t signal(int signum, sighandler_t handler) +{ + return SIG_DFL; +} diff --git a/plat/rpi/libsys/time.c b/plat/rpi/libsys/time.c new file mode 100644 index 000000000..e448a33d0 --- /dev/null +++ b/plat/rpi/libsys/time.c @@ -0,0 +1,19 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +time_t time(time_t* t) +{ + if (t) + *t = 0; + return 0; +} diff --git a/plat/rpi/libsys/write.c b/plat/rpi/libsys/write.c new file mode 100644 index 000000000..9a765b04c --- /dev/null +++ b/plat/rpi/libsys/write.c @@ -0,0 +1,50 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +void _sys_write_tty(char c) +{ + _sys_rawwrite(c); +#if 0 + if ((c == '\n') && !(_sys_ttyflags & RAW)) + _sys_rawwrite('\r'); +#endif + if (c == '\n') + _sys_rawwrite('\r'); +} + +int write(int fd, void* buffer, size_t count) +{ + int i; + char* p = buffer; + + /* We're only allowed to write to fd 0, 1 or 2. */ + + if ((fd < 0) || (fd > 2)) + { + errno = EBADF; + return -1; + } + + /* Write all data. */ + + i = 0; + while (i < count) + { + _sys_write_tty(*p++); + + i++; + } + + /* No failures. */ + + return count; +} From 5378e3fe53d86c5ea55f8f096f959ec95ecfc4aa Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 17 May 2013 22:40:50 +0100 Subject: [PATCH 02/76] Add special relocation type for VC4 jump instructions. --HG-- branch : dtrg-videocore --- h/out.h | 1 + util/amisc/ashow.c | 3 +++ util/led/ack.out.5 | 1 + util/led/relocate.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) diff --git a/h/out.h b/h/out.h index 122296f8e..2c97219e5 100644 --- a/h/out.h +++ b/h/out.h @@ -64,6 +64,7 @@ struct outname { #define RELO4 3 /* 4 bytes */ #define RELOPPC 4 /* PowerPC 26-bit address */ #define RELOH2 5 /* write top 2 bytes of 4 byte word */ +#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */ #define RELPC 0x08 /* pc relative */ #define RELBR 0x10 /* High order byte lowest address. */ diff --git a/util/amisc/ashow.c b/util/amisc/ashow.c index 5f22827ee..67e785ecc 100644 --- a/util/amisc/ashow.c +++ b/util/amisc/ashow.c @@ -143,6 +143,9 @@ showrelo() case RELOH2: printf("\ttop 2 bytes of a 4 byte word\n"); break; + case RELOVC4: + printf("\tVideoCore IV address in 32-bit instruction\n"); + break; default: printf("\tunknown relocation type %d\n", relrec.or_type & RELSZ); break; diff --git a/util/led/ack.out.5 b/util/led/ack.out.5 index 8e85b3f92..d9e24bff6 100644 --- a/util/led/ack.out.5 +++ b/util/led/ack.out.5 @@ -164,6 +164,7 @@ struct outrelo { #define RELO4 0x03 /* 4 bytes */ #define RELOPPC 0x04 /* 26-bit PowerPC address */ #define RELOH2 0x05 /* write top 2 bytes of 4 byte word */ +#define RELOVC4 0x06 /* VideoCore IV address in 32-bit insruction */ #define RELPC 0x08 /* pc relative */ #define RELBR 0x10 /* High order byte lowest address. */ #define RELWR 0x20 /* High order word lowest address. */ diff --git a/util/led/relocate.c b/util/led/relocate.c index 93b1e9c05..f44a34b96 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -8,6 +8,7 @@ static char rcsid[] = "$Id$"; #include #include +#include #include "out.h" #include "const.h" #include "debug.h" @@ -63,6 +64,22 @@ getvalu(addr, type) return read4(addr, type) & 0x03FFFFFD; case RELOH2: return read2(addr, type) << 16; + case RELOVC4: + { + long i = read4(addr, type); + if (i & 0x00800000) + { + /* Branch instruction. */ + return (i<<9)>>9; + } + else + { + /* Branch-link instruction. */ + long hi = (i<<4)>>28; + long lo = (i & 0x007fffff); + return lo | (hi<<23); + } + } default: fatal("bad relocation size"); } @@ -138,6 +155,28 @@ putvalu(valu, addr, type) case RELOH2: write2(valu>>16, addr, type); break; + case RELOVC4: + { + long i = read4(addr, type); + if (i & 0x00800000) + { + /* Branch instruction. */ + unsigned v = (valu/2) & 0x007fffff; + i &= ~0x007fffff; + i |= v; + } + else + { + /* Branch-link instruction. */ + unsigned v = (valu/2) & 0x07ffffff; + unsigned hiv = v >> 23; + unsigned lov = v & 0x007fffff; + i &= ~0x0f7fffff; + i |= (lov>>16) | (hiv<<24); + } + write4(i, addr, type); + break; + } default: fatal("bad relocation size"); } From 26877d3c4f65ca0889c0ce34c7dc277d47c26753 Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 17 May 2013 23:30:49 +0100 Subject: [PATCH 03/76] Add a whole bunch of VC4 opcodes. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 21 ++++ mach/vc4/as/mach2.c | 6 +- mach/vc4/as/mach3.c | 51 ++++++++- mach/vc4/as/mach4.c | 40 ++++--- mach/vc4/as/mach5.c | 205 ++++++++++++++++++++++++++++++++++ mach/vc4/test/opcodes.s | 239 +++++++++++++++++++++++++++++++++++++--- 6 files changed, 524 insertions(+), 38 deletions(-) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 96a8a07fa..915d1fb51 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -4,3 +4,24 @@ * This file is redistributable under the terms of the 3-clause BSD license. * See the file 'Copying' in the root of the distribution for the full text. */ + +#include "binary.h" + +#define ALWAYS 14 + +extern void alu_instr_reg(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned rb); + +extern void alu_instr_lit(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned value); + +extern void misc_instr_reg(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned rb); + +extern void misc_instr_lit(unsigned opcode, unsigned cc, unsigned rd, + unsigned ra, unsigned value); + +extern void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr); + +extern void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, + unsigned extrareg); \ No newline at end of file diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index c69007de0..6fd2b0e45 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -6,13 +6,17 @@ */ %token GPR +%token CC %token OP +%token OP_BRANCH %token OP_ONEREG %token OP_ONELREG %token OP_ALU +%token OP_FPU %token OP_MEM -%token OP_BREG +%token OP_MISC +%token OP_MISCL %token OP_STACK /* Other token types */ diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index b36652db7..b2cd1c2c9 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -5,8 +5,6 @@ * See the file 'Copying' in the root of the distribution for the full text. */ -#include "binary.h" - /* Integer registers */ 0, GPR, 0, "r0", @@ -17,7 +15,6 @@ 0, GPR, 5, "r5", 0, GPR, 6, "r6", -0, GPR, 6, "fp", 0, GPR, 7, "r7", 0, GPR, 8, "r8", 0, GPR, 9, "r9", @@ -42,6 +39,7 @@ 0, GPR, 26, "r26", 0, GPR, 26, "lr", 0, GPR, 27, "r27", +0, GPR, 27, "fp", 0, GPR, 28, "r28", 0, GPR, 29, "r29", 0, GPR, 30, "r30", @@ -49,13 +47,34 @@ 0, GPR, 31, "r31", 0, GPR, 31, "pc", +/* Condition codes */ + +0, CC, 0, ".eq", +0, CC, 1, ".ne", +0, CC, 2, ".cs", +0, CC, 2, ".lo", +0, CC, 3, ".cc", +0, CC, 3, ".hg", +0, CC, 4, ".mi", +0, CC, 5, ".pl", +0, CC, 6, ".vs", +0, CC, 7, ".vc", +0, CC, 8, ".hi", +0, CC, 9, ".ls", +0, CC, 10, ".ge", +0, CC, 11, ".lt", +0, CC, 12, ".gt", +0, CC, 13, ".le", +0, CC, 15, ".f", + /* Special instructions */ 0, OP, B16(00000000,00000001), "nop", 0, OP, B16(00000000,00001010), "rti", -0, OP_ONEREG, B16(00000000,01000000), "b", -0, OP_ONEREG, B16(00000000,01100000), "bl", +0, OP_BRANCH, 0, "b", +0, OP_BRANCH, 1, "bl", + 0, OP_ONELREG, B16(00000000,10000000), "tbb", 0, OP_ONELREG, B16(00000000,10100000), "tbs", @@ -92,4 +111,26 @@ 0, OP_ALU, B8(00011110), "asr", 0, OP_ALU, B8(00011111), "abs", +0, OP_MISC, B16(11001000,00000000), "fadd", +0, OP_MISC, B16(11001000,00100000), "fsub", +0, OP_MISC, B16(11001000,01000000), "fmul", +0, OP_MISC, B16(11001000,01100000), "fdiv", +0, OP_MISC, B16(11001000,10000000), "fcmp", +0, OP_MISC, B16(11001000,10100000), "fabs", +0, OP_MISC, B16(11001000,11000000), "frsb", +0, OP_MISC, B16(11001000,11100000), "fmax", +0, OP_MISC, B16(11001001,00000000), "frcp", +0, OP_MISC, B16(11001001,00100000), "frsqrt", +0, OP_MISC, B16(11001001,01000000), "fnmul", +0, OP_MISC, B16(11001001,01100000), "fmin", +0, OP_MISC, B16(11001001,10000000), "fld1", +0, OP_MISC, B16(11001001,10100000), "fld0", +0, OP_MISC, B16(11001001,11000000), "log2", +0, OP_MISC, B16(11001001,11100000), "exp2", +0, OP_MISC, B16(11000101,11100000), "adds256", +0, OP_MISCL, B16(11000100,10000000), "divs", +0, OP_MISCL, B16(11000100,11100000), "divu", + +0, OP_STACK, B16(00000010,00000000), "push", +0, OP_STACK, B16(00000010,10000000), "pop", diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index d1320dae1..05dba83b8 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -5,15 +5,12 @@ * See the file 'Copying' in the root of the distribution for the full text. */ -#include "binary.h" - operation : OP { emit2($1); } - | OP_ONEREG GPR - { - emit2($1 | ($2<<0)); - } + | OP_BRANCH GPR { emit2($1 | ($2<<0)); } + | OP_BRANCH expr { branch_instr($1, ALWAYS, &$2); } + | OP_BRANCH CC expr { branch_instr($1, $2, &$3); } | OP_ONELREG GPR { @@ -22,17 +19,28 @@ operation emit2($1 | ($2<<0)); } - | OP_ALU GPR ',' GPR - { - emit2(B16(01000000, 00000000) | ($1<<8) | ($2<<0) | ($4<<4)); - } + | OP_ALU GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $2, $4); } + | OP_ALU GPR ',' GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_ALU CC GPR ',' GPR { alu_instr_reg($1, $2, $3, $3, $5); } + | OP_ALU CC GPR ',' GPR ',' GPR { alu_instr_reg($1, $2, $3, $5, $7); } - | OP_ALU GPR ',' '#' u5 - { - if ($1 >= 0x10) - serror("cannot use this ALU operation in 2op form"); - emit2(B16(01100000, 00000000) | ($1<<9) | ($2<<0) | ($5<<4)); - } + | OP_ALU GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $2, $5); } + | OP_ALU GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_ALU CC GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $3, $6); } + | OP_ALU CC GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $5, $8); } + + | OP_MISC GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISC CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + + | OP_MISCL GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISCL CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + | OP_MISCL GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_MISCL CC GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, $2, $3, $5, $8); } + + | OP_STACK GPR { stack_instr($1, $2, $2, -1); } + | OP_STACK GPR ',' GPR { stack_instr($1, $2, $2, $4); } + | OP_STACK GPR '-' GPR { stack_instr($1, $2, $4, -1); } + | OP_STACK GPR '-' GPR ',' GPR { stack_instr($1, $2, $4, $6); } ; e16 diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 668f4b748..83d73da2a 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -5,3 +5,208 @@ * See the file 'Copying' in the root of the distribution for the full text. */ +/* Assemble an ALU instruction where rb is a register. */ + +void alu_instr_reg(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned rb) +{ + /* Can we use short form? */ + + if ((cc == ALWAYS) && (ra == rd)) + { + emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0)); + return; + } + + /* Long form, then. */ + + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Assemble an ALU instruction where rb is a literal. */ + +void alu_instr_lit(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned value) +{ + /* 16 bit short form? */ + + if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) && + !(ra & 0x10)) + { + emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0)); + return; + } + + /* 32 bit medium form? */ + + if (value >= 0x1f) + { + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); + return; + } + + /* Long form, then. */ + + if (cc != ALWAYS) + serror("cannot use condition codes with ALU literals this big"); + + /* add is special. */ + + if (op == B8(00000010)) + emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0)); + else + { + if (ra != rd) + serror("can only use 2op form of ALU instructions with literals this big"); + emit2(B16(11101000,00000000) | (op<<5) | (rd<<0)); + } + + emit4(value); +} + +/* Miscellaneous instructions with three registers and a cc. */ + +void misc_instr_reg(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned rb) +{ + emit2(op | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Miscellaneous instructions with two registers, a literal, and a cc. */ + +void misc_instr_lit(unsigned op, unsigned cc, + unsigned rd, unsigned ra, unsigned value) +{ + if (value < 0x1f) + serror("only constants from 0..31 can be used here"); + + emit2(op | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); +} + +/* Assemble a branch instruction. This may be a near branch into this + * object file, or a far branch which requires a fixup. */ + +void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr) +{ + unsigned type = expr->typ & S_TYP; + + /* Sanity checking. */ + + if (bl && (cc != ALWAYS)) + serror("can't use condition codes with bl"); + if (type == S_ABS) + serror("can't use absolute addresses here"); + + switch (pass) + { + case 0: + /* Calculate size of instructions only. For now we just assume + * that they're going to be the maximum size, 32 bits. */ + + emit4(0); + break; + + case 1: + case 2: + { + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + int d = (expr->val - DOTVAL) / 2; + + /* We now know the worst case for the instruction layout. At + * this point we can emit the instructions, which may shrink + * the code. */ + + if (!bl && (type == DOTTYP)) + { + /* This is a reference to code within this section. If it's + * close enough to the program counter, we can use a short- + * form instruction. */ + + if ((d >= -128) && (d < 127)) + { + emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); + break; + } + } + + /* Absolute addresses and references to other sections + * need the full 32 bits. */ + + newrelo(expr->typ, RELOVC4 | RELPC); + + if (bl) + { + unsigned v = d & 0x07ffffff; + unsigned hiv = v >> 23; + unsigned lov = v & 0x007fffff; + emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); + emit2(B16(00000000,00000000) | (lov&0xffff)); + } + else + { + unsigned v = d & 0x007fffff; + emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); + emit2(B16(00000000,00000000) | (v&0xffff)); + } + break; + } + } +} + +void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, + unsigned extrareg) +{ + unsigned b; + + switch (loreg) + { + case 0: b = 0; break; + case 6: b = 1; break; + case 16: b = 2; break; + case 24: b = 3; break; + + case 26: /* lr */ + extrareg = 26; + hireg = 31; + loreg = 0; + b = 0; + break; + + case 31: /* pc */ + extrareg = 31; + hireg = 31; + loreg = 0; + b = 0; + break; + + default: + serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc"); + } + + if (opcode & 0x0080) + { + /* Pop */ + if (extrareg == 26) + serror("cannot pop lr"); + } + else + { + /* Push */ + if (extrareg == 31) + serror("cannot push pc"); + } + + if (hireg < loreg) + serror("invalid register range"); + + emit2(opcode | (b<<5) | (hireg<<0) | + ((extrareg != -1) ? 0x0100 : 0)); +} + + diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index 894a7540e..6dedc4168 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -25,6 +25,8 @@ main: tbs r0 tbs r15 + nop + mov r0, r1 cmn r0, r1 add r0, r1 @@ -58,20 +60,225 @@ main: asr r0, r1 abs r0, r1 - mov r0, #31 - cmn r0, #31 - add r0, #31 - bic r0, #31 - mul r0, #31 - eor r0, #31 - sub r0, #31 - and r0, #31 - mvn r0, #31 - ror r0, #31 - cmp r0, #31 - rsb r0, #31 - btst r0, #31 - or r0, #31 - extu r0, #31 - max r0, #31 + nop + mov.f r0, r1 + cmn.f r0, r1 + add.f r0, r1 + bic.f r0, r1 + mul.f r0, r1 + eor.f r0, r1 + sub.f r0, r1 + and.f r0, r1 + mvn.f r0, r1 + ror.f r0, r1 + cmp.f r0, r1 + rsb.f r0, r1 + btst.f r0, r1 + or.f r0, r1 + extu.f r0, r1 + max.f r0, r1 + bset.f r0, r1 + min.f r0, r1 + bclr.f r0, r1 + adds2.f r0, r1 + bchg.f r0, r1 + adds4.f r0, r1 + adds8.f r0, r1 + adds16.f r0, r1 + exts.f r0, r1 + neg.f r0, r1 + lsr.f r0, r1 + clz.f r0, r1 + lsl.f r0, r1 + brev.f r0, r1 + asr.f r0, r1 + abs.f r0, r1 + + nop + + mov r0, r1, r2 + cmn r0, r1, r2 + add r0, r1, r2 + bic r0, r1, r2 + mul r0, r1, r2 + eor r0, r1, r2 + sub r0, r1, r2 + and r0, r1, r2 + mvn r0, r1, r2 + ror r0, r1, r2 + cmp r0, r1, r2 + rsb r0, r1, r2 + btst r0, r1, r2 + or r0, r1, r2 + extu r0, r1, r2 + max r0, r1, r2 + bset r0, r1, r2 + min r0, r1, r2 + bclr r0, r1, r2 + adds2 r0, r1, r2 + bchg r0, r1, r2 + adds4 r0, r1, r2 + adds8 r0, r1, r2 + adds16 r0, r1, r2 + exts r0, r1, r2 + neg r0, r1, r2 + lsr r0, r1, r2 + clz r0, r1, r2 + lsl r0, r1, r2 + brev r0, r1, r2 + asr r0, r1, r2 + abs r0, r1, r2 + + nop + + mov r0, #0x1f + cmn r0, #0x1f + add r0, #0x1f + bic r0, #0x1f + mul r0, #0x1f + eor r0, #0x1f + sub r0, #0x1f + and r0, #0x1f + mvn r0, #0x1f + ror r0, #0x1f + cmp r0, #0x1f + rsb r0, #0x1f + btst r0, #0x1f + or r0, #0x1f + extu r0, #0x1f + max r0, #0x1f + bset r0, #0x1f + min r0, #0x1f + bclr r0, #0x1f + adds2 r0, #0x1f + bchg r0, #0x1f + adds4 r0, #0x1f + adds8 r0, #0x1f + adds16 r0, #0x1f + exts r0, #0x1f + neg r0, #0x1f + lsr r0, #0x1f + clz r0, #0x1f + lsl r0, #0x1f + brev r0, #0x1f + asr r0, #0x1f + abs r0, #0x1f + + nop + + mov.f r0, #0x1f + cmn.f r0, #0x1f + add.f r0, #0x1f + bic.f r0, #0x1f + mul.f r0, #0x1f + eor.f r0, #0x1f + sub.f r0, #0x1f + and.f r0, #0x1f + mvn.f r0, #0x1f + ror.f r0, #0x1f + cmp.f r0, #0x1f + rsb.f r0, #0x1f + btst.f r0, #0x1f + or.f r0, #0x1f + extu.f r0, #0x1f + max.f r0, #0x1f + bset.f r0, #0x1f + min.f r0, #0x1f + bclr.f r0, #0x1f + adds2.f r0, #0x1f + bchg.f r0, #0x1f + adds4.f r0, #0x1f + adds8.f r0, #0x1f + adds16.f r0, #0x1f + exts.f r0, #0x1f + neg.f r0, #0x1f + lsr.f r0, #0x1f + clz.f r0, #0x1f + lsl.f r0, #0x1f + brev.f r0, #0x1f + asr.f r0, #0x1f + abs.f r0, #0x1f + + add r0, #0x12345678 + add r0, r1, #0x12345678 + sub r0, #0x12345678 + + nop + + fadd r0, r1, r2 + fsub r0, r1, r2 + fmul r0, r1, r2 + fdiv r0, r1, r2 + fcmp r0, r1, r2 + fabs r0, r1, r2 + frsb r0, r1, r2 + fmax r0, r1, r2 + frcp r0, r1, r2 + frsqrt r0, r1, r2 + fnmul r0, r1, r2 + fmin r0, r1, r2 + fld1 r0, r1, r2 + fld0 r0, r1, r2 + log2 r0, r1, r2 + exp2 r0, r1, r2 + divs r0, r1, r2 + divu r0, r1, r2 + divs r0, r1, #31 + divu r0, r1, #31 + adds256 r0, r1, r2 + + nop + + fadd.f r0, r1, r2 + fsub.f r0, r1, r2 + fmul.f r0, r1, r2 + fdiv.f r0, r1, r2 + fcmp.f r0, r1, r2 + fabs.f r0, r1, r2 + frsb.f r0, r1, r2 + fmax.f r0, r1, r2 + frcp.f r0, r1, r2 + frsqrt.f r0, r1, r2 + fnmul.f r0, r1, r2 + fmin.f r0, r1, r2 + fld1.f r0, r1, r2 + fld0.f r0, r1, r2 + log2.f r0, r1, r2 + exp2.f r0, r1, r2 + divs.f r0, r1, r2 + divu.f r0, r1, r2 + divs.f r0, r1, #31 + divu.f r0, r1, #31 + adds256.f r0, r1, r2 + +label: + b label + b forward + b label + b main + b.f label + b.f forward + b.f main + bl label + bl forward + bl main +forward: + + push r0 + push r0, lr + push r0-r5 + push r0-r5, lr + push r6 + push r16 + push r24 + push lr + + pop r0 + pop r0, pc + pop r0-r5 + pop r0-r5, pc + pop r6 + pop r16 + pop r24 + pop pc From fc2833d4568f5e474661dec3c3cf93b33cbd2cc5 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 00:56:56 +0100 Subject: [PATCH 04/76] Add most vanilla memory load/store instructions. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach0.c | 2 + mach/vc4/as/mach1.c | 24 +++---- mach/vc4/as/mach2.c | 4 -- mach/vc4/as/mach3.c | 12 +++- mach/vc4/as/mach4.c | 84 ++++--------------------- mach/vc4/as/mach5.c | 134 +++++++++++++++++++++++++++++++++++----- mach/vc4/test/opcodes.s | 51 +++++++++++++++ 7 files changed, 205 insertions(+), 106 deletions(-) diff --git a/mach/vc4/as/mach0.c b/mach/vc4/as/mach0.c index b6294ecfc..d12994de0 100644 --- a/mach/vc4/as/mach0.c +++ b/mach/vc4/as/mach0.c @@ -19,6 +19,8 @@ #undef word_t #define word_t long +typedef unsigned long quad; + #undef ALIGNWORD #define ALIGNWORD 4 diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 915d1fb51..bfb298f68 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -9,19 +9,21 @@ #define ALWAYS 14 -extern void alu_instr_reg(unsigned opcode, unsigned cc, unsigned rd, - unsigned ra, unsigned rb); +extern void alu_instr_reg(quad opcode, quad cc, quad rd, + quad ra, quad rb); -extern void alu_instr_lit(unsigned opcode, unsigned cc, unsigned rd, - unsigned ra, unsigned value); +extern void alu_instr_lit(quad opcode, quad cc, quad rd, + quad ra, quad value); -extern void misc_instr_reg(unsigned opcode, unsigned cc, unsigned rd, - unsigned ra, unsigned rb); +extern void misc_instr_reg(quad opcode, quad cc, quad rd, + quad ra, quad rb); -extern void misc_instr_lit(unsigned opcode, unsigned cc, unsigned rd, - unsigned ra, unsigned value); +extern void misc_instr_lit(quad opcode, quad cc, quad rd, + quad ra, quad value); -extern void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr); +extern void branch_instr(quad bl, quad cc, struct expr_t* expr); -extern void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, - unsigned extrareg); \ No newline at end of file +extern void stack_instr(quad opcode, quad loreg, quad hireg, + quad extrareg); + +extern void mem_instr(quad opcode, quad cc, quad rd, long offset, quad rs); diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index 6fd2b0e45..2abde7136 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -19,8 +19,4 @@ %token OP_MISCL %token OP_STACK -/* Other token types */ -/* %type c */ -%type e16 u8 u7 u6 u5 u4 u2 u1 -/* %type nb ds bda bdl lia lil */ diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index b2cd1c2c9..64b503a98 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -33,13 +33,13 @@ 0, GPR, 22, "r22", 0, GPR, 23, "r23", 0, GPR, 24, "r24", +0, GPR, 24, "fp", 0, GPR, 25, "r25", 0, GPR, 25, "sp", 0, GPR, 26, "r26", 0, GPR, 26, "lr", 0, GPR, 27, "r27", -0, GPR, 27, "fp", 0, GPR, 28, "r28", 0, GPR, 29, "r29", 0, GPR, 30, "r30", @@ -134,3 +134,13 @@ 0, OP_STACK, B16(00000010,00000000), "push", 0, OP_STACK, B16(00000010,10000000), "pop", + +0, OP_MEM, B8(00000000), "ld", +0, OP_MEM, B8(00000001), "st", +0, OP_MEM, B8(00000010), "ldh", +0, OP_MEM, B8(00000011), "sth", +0, OP_MEM, B8(00000100), "ldb", +0, OP_MEM, B8(00000101), "stb", +0, OP_MEM, B8(00000110), "ldhs", +0, OP_MEM, B8(00000111), "sths", + diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index 05dba83b8..7bd183829 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -41,78 +41,16 @@ operation | OP_STACK GPR ',' GPR { stack_instr($1, $2, $2, $4); } | OP_STACK GPR '-' GPR { stack_instr($1, $2, $4, -1); } | OP_STACK GPR '-' GPR ',' GPR { stack_instr($1, $2, $4, $6); } - ; - -e16 - : expr - { - DOTVAL += 2; - newrelo($1.typ, RELO2 | FIXUPFLAGS); - DOTVAL -= 2; - $$ = $1.val & 0xFFFF; - } - ; - -u8 - : absexp - { - if (($1 < 0) || ($1 > 0xFF)) - serror("8-bit unsigned value out of range"); - $$ = $1; - } - ; - -u7 - : absexp - { - if (($1 < 0) || ($1 > 0x7F)) - serror("7-bit unsigned value out of range"); - $$ = $1; - } - ; - -u6 - : absexp - { - if (($1 < 0) || ($1 > 0x3F)) - serror("6-bit unsigned value out of range"); - $$ = $1; - } - ; - -u5 - : absexp - { - if (($1 < 0) || ($1 > 0x1F)) - serror("5-bit unsigned value out of range"); - $$ = $1; - } - ; - -u4 - : absexp - { - if (($1 < 0) || ($1 > 0xF)) - serror("4-bit unsigned value out of range"); - $$ = $1; - } - ; - -u1 - : absexp - { - if (($1 < 0) || ($1 > 1)) - serror("1-bit unsigned value out of range"); - $$ = $1; - } - ; - -u2 - : absexp - { - if (($1 < 0) || ($1 > 0x3)) - serror("2-bit unsigned value out of range"); - $$ = $1; - } + + | OP_MEM GPR ',' '(' GPR ')' { mem_instr($1, ALWAYS, $2, 0, $5); } + | OP_MEM CC GPR ',' '(' GPR ')' { mem_instr($1, $2, $3, 0, $6); } + | OP_MEM GPR ',' absexp '(' GPR ')' { mem_instr($1, ALWAYS, $2, $4, $6); } + | OP_MEM CC GPR ',' absexp '(' GPR ')' { mem_instr($1, $2, $3, $5, $7); } + + | OP_MEM GPR ',' '(' GPR ',' GPR ')' + | OP_MEM CC GPR ',' '(' GPR ',' GPR ')' + + | OP_MEM GPR ',' '(' GPR ')' '+' '+' + | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' ; diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 83d73da2a..32d7507f3 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -5,10 +5,11 @@ * See the file 'Copying' in the root of the distribution for the full text. */ +#define maskx(v, x) (v & ((1<<(x))-1)) + /* Assemble an ALU instruction where rb is a register. */ -void alu_instr_reg(unsigned op, unsigned cc, - unsigned rd, unsigned ra, unsigned rb) +void alu_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) { /* Can we use short form? */ @@ -26,8 +27,7 @@ void alu_instr_reg(unsigned op, unsigned cc, /* Assemble an ALU instruction where rb is a literal. */ -void alu_instr_lit(unsigned op, unsigned cc, - unsigned rd, unsigned ra, unsigned value) +void alu_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) { /* 16 bit short form? */ @@ -68,8 +68,7 @@ void alu_instr_lit(unsigned op, unsigned cc, /* Miscellaneous instructions with three registers and a cc. */ -void misc_instr_reg(unsigned op, unsigned cc, - unsigned rd, unsigned ra, unsigned rb) +void misc_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) { emit2(op | (rd<<0)); emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); @@ -77,8 +76,7 @@ void misc_instr_reg(unsigned op, unsigned cc, /* Miscellaneous instructions with two registers, a literal, and a cc. */ -void misc_instr_lit(unsigned op, unsigned cc, - unsigned rd, unsigned ra, unsigned value) +void misc_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) { if (value < 0x1f) serror("only constants from 0..31 can be used here"); @@ -90,9 +88,9 @@ void misc_instr_lit(unsigned op, unsigned cc, /* Assemble a branch instruction. This may be a near branch into this * object file, or a far branch which requires a fixup. */ -void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr) +void branch_instr(quad bl, quad cc, struct expr_t* expr) { - unsigned type = expr->typ & S_TYP; + quad type = expr->typ & S_TYP; /* Sanity checking. */ @@ -142,15 +140,15 @@ void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr) if (bl) { - unsigned v = d & 0x07ffffff; - unsigned hiv = v >> 23; - unsigned lov = v & 0x007fffff; + quad v = d & 0x07ffffff; + quad hiv = v >> 23; + quad lov = v & 0x007fffff; emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); emit2(B16(00000000,00000000) | (lov&0xffff)); } else { - unsigned v = d & 0x007fffff; + quad v = d & 0x007fffff; emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); emit2(B16(00000000,00000000) | (v&0xffff)); } @@ -159,10 +157,11 @@ void branch_instr(unsigned bl, unsigned cc, struct expr_t* expr) } } -void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, - unsigned extrareg) +/* Push/pop. */ + +void stack_instr(quad opcode, quad loreg, quad hireg, quad extrareg) { - unsigned b; + quad b; switch (loreg) { @@ -209,4 +208,105 @@ void stack_instr(unsigned opcode, unsigned loreg, unsigned hireg, ((extrareg != -1) ? 0x0100 : 0)); } +/* Memory operations where the offset is a fixed value (including zero). */ + +void mem_instr(quad opcode, quad cc, quad rd, long offset, quad rs) +{ + quad uoffset = (quad) offset; + int multiple4 = !(offset & 3); + int intonly = ((opcode & B8(00000110)) == 0); + + /* If no CC, there are some special forms we can use. */ + + if (cc == ALWAYS) + { + /* Very short form, special for stack offsets. */ + + if (intonly && (rs == 25) && multiple4 && fitx(offset, 7) && (rd < 0x10)) + { + quad o = maskx(offset, 7) / 4; + emit2(B16(00000100,00000000) | (opcode<<9) | (o<<4) | (rd<<0)); + return; + } + + /* Slightly longer form for directly dereferencing via a register. */ + + if ((rs < 0x10) && (rd < 0x10) && (offset == 0)) + { + emit2(B16(00001000,00000000) | (opcode<<8) | (rs<<4) | (rd<<4)); + return; + } + + /* Integer only, but a limited offset. */ + + if (intonly && (uoffset <= 0x3f) && (rs < 0x10) && (rd < 0x10)) + { + quad o = uoffset / 4; + emit2(B16(00100000,00000000) | (opcode<<12) | (o<<8) | + (rs<<4) | (rd<<0)); + return; + } + + /* Certain registers support 16-bit offsets. */ + + if (fitx(offset, 16)) + { + switch (rs) + { + case 0: opcode = B16(10101011,00000000) | (opcode<<5); goto specialreg; + case 24: opcode = B16(10101000,00000000) | (opcode<<5); goto specialreg; + case 25: opcode = B16(10101001,00000000) | (opcode<<5); goto specialreg; + case 31: opcode = B16(10101010,00000000) | (opcode<<5); goto specialreg; + default: break; + + specialreg: + { + quad o = maskx(offset, 16); + emit2(opcode | (rd<<0)); + emit2(o); + return; + } + } + } + + /* 12-bit displacements. */ + + if (fitx(offset, 12)) + { + quad looffset = maskx(offset, 11); + quad hioffset = (offset >> 11) & 1; + + emit2(B16(10100010,00000000) | (opcode<<5) | (rd<<0) | (hioffset<<8)); + emit2(B16(00000000,00000000) | (rs<<11) | (looffset<<0)); + return; + } + + /* Everything else uses Very Long Form. */ + + if (!fitx(offset, 27)) + serror("offset will not fit into load/store instruction"); + + if (rs == 31) + opcode = B16(11100111,00000000) | (opcode<<5); + else + opcode = B16(11100110,00000000) | (opcode<<5); + + emit2(opcode | (rd<<0)); + emit4((rs<<27) | maskx(offset, 27)); + return; + } + + /* Now we're on to load/store instructions with ccs. */ + + if (uoffset <= 0x1f) + { + emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,01000000) | (rs<<11) | (cc<<7) | (uoffset<<0)); + return; + } + + /* No encoding for this instruction. */ + + serror("invalid load/store instruction"); +} diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index 6dedc4168..2f9c1c709 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -282,3 +282,54 @@ forward: pop r16 pop r24 pop pc + + nop + + ld r0, (sp) + st r0, (sp) + ld r0, 4(sp) + st r0, 4(sp) + ld r0, -4(sp) + st r0, -4(sp) + ld r0, 5(sp) + st r0, 5(sp) + ld r0, -5(sp) + st r0, -5(sp) + + ld r0, (r1) + st r0, (r1) + ld r16, (r1) + st r16, (r1) + ldh r0, (r1) + sth r0, (r1) + ldb r0, (r1) + stb r0, (r1) + ldhs r0, (r1) + sths r0, (r1) + ldh r16, (r1) + sth r16, (r1) + ldb r16, (r1) + stb r16, (r1) + ldhs r16, (r1) + sths r16, (r1) + ld r0, 0x3c (r1) + st r0, 0x3c (r1) + ld r0, 0xfff (r1) + st r0, 0xfff (r1) + ld r1, 0xffff (r0) + st r1, 0xffff (r0) + ld r0, -1 (r1) + st r0, -1 (r1) + ld r16, 0x3c (r1) + st r16, 0x3c (r1) + ld r16, 0xfff (r1) + st r16, 0xfff (r1) + ld r16, 0xffff (r0) + st r16, 0xffff (r0) + ld r16, -1 (r1) + st r16, -1 (r1) + + ld.f r0, (r1) + st.f r0, (r1) + ld.f r0, 8 (r1) + st.f r0, 8 (r1) From febe8ca937ff80005191155c96e25f37db98c054 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 12:39:35 +0100 Subject: [PATCH 05/76] Add register offset and postincrement memory operations. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 27 +++++++++------------------ mach/vc4/as/mach4.c | 10 +++++----- mach/vc4/as/mach5.c | 32 ++++++++++++++++++++++++-------- mach/vc4/test/opcodes.s | 11 +++++++++++ 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index bfb298f68..25d5034b4 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -9,21 +9,12 @@ #define ALWAYS 14 -extern void alu_instr_reg(quad opcode, quad cc, quad rd, - quad ra, quad rb); - -extern void alu_instr_lit(quad opcode, quad cc, quad rd, - quad ra, quad value); - -extern void misc_instr_reg(quad opcode, quad cc, quad rd, - quad ra, quad rb); - -extern void misc_instr_lit(quad opcode, quad cc, quad rd, - quad ra, quad value); - -extern void branch_instr(quad bl, quad cc, struct expr_t* expr); - -extern void stack_instr(quad opcode, quad loreg, quad hireg, - quad extrareg); - -extern void mem_instr(quad opcode, quad cc, quad rd, long offset, quad rs); +extern void alu_instr_reg(quad opcode, int cc, int rd, int ra, int rb); +extern void alu_instr_lit(quad opcode, int cc, int rd, int ra, quad value); +extern void misc_instr_reg(quad opcode, int cc, int rd, int ra, int rb); +extern void misc_instr_lit(quad opcode, int cc, int rd, int ra, quad value); +extern void branch_instr(int bl, int cc, struct expr_t* expr); +extern void stack_instr(quad opcode, int loreg, int hireg, int extrareg); +extern void mem_instr(quad opcode, int cc, int rd, long offset, int rs); +extern void mem_offset_instr(quad opcode, int cc, int rd, int qa, int rb); +extern void mem_postincr_instr(quad opcode, int cc, int rd, int rs); diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index 7bd183829..f614825e9 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -6,7 +6,7 @@ */ operation - : OP { emit2($1); } + : OP { emit2($1); } | OP_BRANCH GPR { emit2($1 | ($2<<0)); } | OP_BRANCH expr { branch_instr($1, ALWAYS, &$2); } @@ -47,10 +47,10 @@ operation | OP_MEM GPR ',' absexp '(' GPR ')' { mem_instr($1, ALWAYS, $2, $4, $6); } | OP_MEM CC GPR ',' absexp '(' GPR ')' { mem_instr($1, $2, $3, $5, $7); } - | OP_MEM GPR ',' '(' GPR ',' GPR ')' - | OP_MEM CC GPR ',' '(' GPR ',' GPR ')' + | OP_MEM GPR ',' '(' GPR ',' GPR ')' { mem_offset_instr($1, ALWAYS, $2, $5, $7); } + | OP_MEM CC GPR ',' '(' GPR ',' GPR ')' { mem_offset_instr($1, $2, $3, $6, $8); } - | OP_MEM GPR ',' '(' GPR ')' '+' '+' - | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' + | OP_MEM GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, ALWAYS, $2, $5); } + | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, $2, $3, $6); } ; diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 32d7507f3..0d68c8ff9 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -9,7 +9,7 @@ /* Assemble an ALU instruction where rb is a register. */ -void alu_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) +void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) { /* Can we use short form? */ @@ -27,7 +27,7 @@ void alu_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) /* Assemble an ALU instruction where rb is a literal. */ -void alu_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) +void alu_instr_lit(quad op, int cc, int rd, int ra, quad value) { /* 16 bit short form? */ @@ -68,7 +68,7 @@ void alu_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) /* Miscellaneous instructions with three registers and a cc. */ -void misc_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) +void misc_instr_reg(quad op, int cc, int rd, int ra, int rb) { emit2(op | (rd<<0)); emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); @@ -76,7 +76,7 @@ void misc_instr_reg(quad op, quad cc, quad rd, quad ra, quad rb) /* Miscellaneous instructions with two registers, a literal, and a cc. */ -void misc_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) +void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) { if (value < 0x1f) serror("only constants from 0..31 can be used here"); @@ -88,7 +88,7 @@ void misc_instr_lit(quad op, quad cc, quad rd, quad ra, quad value) /* Assemble a branch instruction. This may be a near branch into this * object file, or a far branch which requires a fixup. */ -void branch_instr(quad bl, quad cc, struct expr_t* expr) +void branch_instr(int bl, int cc, struct expr_t* expr) { quad type = expr->typ & S_TYP; @@ -159,9 +159,9 @@ void branch_instr(quad bl, quad cc, struct expr_t* expr) /* Push/pop. */ -void stack_instr(quad opcode, quad loreg, quad hireg, quad extrareg) +void stack_instr(quad opcode, int loreg, int hireg, int extrareg) { - quad b; + int b; switch (loreg) { @@ -210,7 +210,7 @@ void stack_instr(quad opcode, quad loreg, quad hireg, quad extrareg) /* Memory operations where the offset is a fixed value (including zero). */ -void mem_instr(quad opcode, quad cc, quad rd, long offset, quad rs) +void mem_instr(quad opcode, int cc, int rd, long offset, int rs) { quad uoffset = (quad) offset; int multiple4 = !(offset & 3); @@ -310,3 +310,19 @@ void mem_instr(quad opcode, quad cc, quad rd, long offset, quad rs) serror("invalid load/store instruction"); } +/* Memory operations where the destination address is a sum of two + * registers. */ + +void mem_offset_instr(quad opcode, int cc, int rd, int ra, int rb) +{ + emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Memory operations with postincrement. */ + +void mem_postincr_instr(quad opcode, int cc, int rd, int rs) +{ + emit2(B16(10100101,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (rs<<11) | (cc<<7)); +} diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index 2f9c1c709..ddc6b1706 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -333,3 +333,14 @@ forward: st.f r0, (r1) ld.f r0, 8 (r1) st.f r0, 8 (r1) + + ld r0, (r1, r2) + st r0, (r1, r2) + ld.f r0, (pc, pc) + st.f r0, (pc, pc) + + ld r0, (r1)++ + st r0, (r1)++ + ld.f pc, (pc)++ + st.f pc, (pc)++ + From 80afe75c9bfc40a2bc7de4354c7fa4c1416724d4 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 13:03:53 +0100 Subject: [PATCH 06/76] Added memory operations that work on fixed up addresses. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 1 + mach/vc4/as/mach4.c | 2 ++ mach/vc4/as/mach5.c | 62 +++++++++++++++++++++++++++++++++++++++++ mach/vc4/test/opcodes.s | 9 ++++++ 4 files changed, 74 insertions(+) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 25d5034b4..6e7b6819f 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -18,3 +18,4 @@ extern void stack_instr(quad opcode, int loreg, int hireg, int extrareg); extern void mem_instr(quad opcode, int cc, int rd, long offset, int rs); extern void mem_offset_instr(quad opcode, int cc, int rd, int qa, int rb); extern void mem_postincr_instr(quad opcode, int cc, int rd, int rs); +extern void mem_address_instr(quad opcode, int rd, struct expr_t* expr); diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index f614825e9..78c9337c9 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -52,5 +52,7 @@ operation | OP_MEM GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, ALWAYS, $2, $5); } | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, $2, $3, $6); } + + | OP_MEM GPR ',' expr { mem_address_instr($1, $2, &$4); } ; diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 0d68c8ff9..265e18f52 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -326,3 +326,65 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) emit2(B16(10100101,00000000) | (opcode<<5) | (rd<<0)); emit2(B16(00000000,00000000) | (rs<<11) | (cc<<7)); } + +/* Memory operations where the destination is an address literal. */ + +void mem_address_instr(quad opcode, int rd, struct expr_t* expr) +{ + quad type = expr->typ & S_TYP; + + /* Sanity checking. */ + + if (type == S_ABS) + serror("can't use absolute addresses here"); + + switch (pass) + { + case 0: + /* Calculate size of instructions only. For now we just assume + * that they're going to be the maximum size, 48 bits. */ + + emit2(0); + emit4(0); + break; + + case 1: + case 2: + { + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + int d = (expr->val - DOTVAL) / 2; + + /* We now know the worst case for the instruction layout. At + * this point we can emit the instructions, which may shrink + * the code. */ + + if (type == DOTTYP) + { + /* This is a reference to an address within this section. If + * it's close enough to the program counter, we can use a + * shorter instruction. */ + + if (fitx(d, 16)) + { + emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); + emit2(d); + return; + } + } + + /* Otherwise we need the full 48 bits. */ + + if (!fitx(d, 27)) + serror("offset too big to encode into instruction"); + + newrelo(expr->typ, RELOVC4 | RELPC); + + emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); + emit4((31<<27) | maskx(d, 27)); + break; + } + } +} + diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index ddc6b1706..a2104fcd7 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -339,8 +339,17 @@ forward: ld.f r0, (pc, pc) st.f r0, (pc, pc) +near: ld r0, (r1)++ st r0, (r1)++ ld.f pc, (pc)++ st.f pc, (pc)++ + ld r0, near + ld r0, main + st r0, near + st r0, main + ldb r0, near + ldb r0, main + stb r0, near + stb r0, main From 4f15423d63b7e35336aef1f2ebf2d2dc4bf24401 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 18:40:19 +0100 Subject: [PATCH 07/76] Add compare-and-branch instructions. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 4 ++ mach/vc4/as/mach2.c | 2 +- mach/vc4/as/mach3.c | 3 +- mach/vc4/as/mach4.c | 23 +++++++++-- mach/vc4/as/mach5.c | 84 +++++++++++++++++++++++++++++++++++++++++ mach/vc4/test/opcodes.s | 7 ++++ 6 files changed, 118 insertions(+), 5 deletions(-) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 6e7b6819f..440d7de97 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -19,3 +19,7 @@ extern void mem_instr(quad opcode, int cc, int rd, long offset, int rs); extern void mem_offset_instr(quad opcode, int cc, int rd, int qa, int rb); extern void mem_postincr_instr(quad opcode, int cc, int rd, int rs); extern void mem_address_instr(quad opcode, int rd, struct expr_t* expr); +extern void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr); +extern void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr); +extern void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr); +extern void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr); diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index 2abde7136..8143d080b 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -9,7 +9,7 @@ %token CC %token OP -%token OP_BRANCH +%token OP_BRANCH OP_BRANCHLINK OP_ADDCMPB %token OP_ONEREG %token OP_ONELREG %token OP_ALU diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index 64b503a98..aba49dca2 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -73,7 +73,8 @@ 0, OP, B16(00000000,00001010), "rti", 0, OP_BRANCH, 0, "b", -0, OP_BRANCH, 1, "bl", +0, OP_BRANCHLINK, 0, "bl", +0, OP_ADDCMPB, 0, "addcmpb", 0, OP_ONELREG, B16(00000000,10000000), "tbb", 0, OP_ONELREG, B16(00000000,10100000), "tbs", diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index 78c9337c9..e9593e761 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -8,9 +8,26 @@ operation : OP { emit2($1); } - | OP_BRANCH GPR { emit2($1 | ($2<<0)); } - | OP_BRANCH expr { branch_instr($1, ALWAYS, &$2); } - | OP_BRANCH CC expr { branch_instr($1, $2, &$3); } + | OP_BRANCH GPR { emit2(B16(00000000,01000000) | ($2<<0)); } + | OP_BRANCHLINK GPR { emit2(B16(00000000,01100000) | ($2<<0)); } + + | OP_BRANCH expr { branch_instr(0, ALWAYS, &$2); } + | OP_BRANCHLINK expr { branch_instr(1, ALWAYS, &$2); } + | OP_BRANCH CC expr { branch_instr(0, $2, &$3); } + | OP_BRANCHLINK CC expr { branch_instr(1, $2, &$3); } + + | OP_BRANCH GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, 0, $2, $4, &$6); } + | OP_BRANCH CC GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, 0, $3, $5, &$7); } + | OP_BRANCH GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, 0, $2, $5, &$7); } + | OP_BRANCH CC GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, 0, $3, $6, &$8); } + | OP_ADDCMPB GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $4, $6, &$8); } + | OP_ADDCMPB CC GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $5, $7, &$9); } + | OP_ADDCMPB GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $5, $7, &$9); } + | OP_ADDCMPB CC GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $6, $8, &$10); } + | OP_ADDCMPB GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr(ALWAYS, $2, $4, $7, &$9); } + | OP_ADDCMPB CC GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr($2, $3, $5, $8, &$10); } + | OP_ADDCMPB GPR ',' '#' absexp ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, $2, $5, $8, &$10); } + | OP_ADDCMPB CC GPR ',' '#' absexp ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, $3, $6, $9, &$11); } | OP_ONELREG GPR { diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 265e18f52..768d2eaee 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -388,3 +388,87 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) } } +/* Common code for handling addcmp: merge in as much of expr as will fit to + * the second pair of the addcmp opcode. */ + +static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr) +{ + quad type = expr->typ & S_TYP; + + switch (pass) + { + case 0: + /* Calculate size of instructions only. */ + + emit2(0); + break; + + case 1: + case 2: + { + if (type != DOTTYP) + serror("can't use this type of branch to jump outside the section"); + + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + int d = (expr->val - DOTVAL-2 + 4) / 2; + + if (!fitx(d, bits)) + serror("target of branch is too far away"); + + emit2(opcode | maskx(d, bits)); + break; + } + } +} + +void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (ra >= 0x10) || (rs >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0)); + branch_addcmp_common(B16(00000000,00000000) | (rs<<10), 10, expr); +} + +void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (rs >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(va, 4)) + serror("value too big to encode into instruction"); + va = maskx(va, 4); + + emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0)); + branch_addcmp_common(B16(01000000,00000000) | (rs<<10), 10, expr); +} + +void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (ra >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(vs, 6)) + serror("value too big to encode into instruction"); + vs = maskx(vs, 6); + + emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0)); + branch_addcmp_common(B16(10000000,00000000) | (vs<<8), 8, expr); +} + +void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr) +{ + if (rd >= 0x10) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(va, 4) || !fitx(vs, 6)) + serror("value too big to encode into instruction"); + va = maskx(va, 4); + vs = maskx(vs, 6); + + emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0)); + branch_addcmp_common(B16(11000000,00000000) | (vs<<8), 8, expr); +} + diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s index a2104fcd7..87a50d070 100644 --- a/mach/vc4/test/opcodes.s +++ b/mach/vc4/test/opcodes.s @@ -353,3 +353,10 @@ near: ldb r0, main stb r0, near stb r0, main + + b.eq r0, r1, near + b r0, r1, near + addcmpb r0, r1, r2, . + addcmpb r0, #1, r2, . + addcmpb r0, r1, #1, . + addcmpb r0, #1, #2, . From a46ee9185912625037c13d89b7758c742127b0cc Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 23:18:36 +0100 Subject: [PATCH 08/76] Fix warning. --HG-- branch : dtrg-videocore --- mach/proto/ncg/fillem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mach/proto/ncg/fillem.c b/mach/proto/ncg/fillem.c index 8838d8f9c..516239b3d 100644 --- a/mach/proto/ncg/fillem.c +++ b/mach/proto/ncg/fillem.c @@ -82,6 +82,8 @@ extern char em_flag[]; extern short em_ptyp[]; extern double atof(); +void prolog(full nlocals); + /* Own version of atol that continues computing on overflow. We don't know that about the ANSI C one. */ From 80f85001fa4ec27f529726e5b3ca10ed9c3dfe9f Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 23:19:10 +0100 Subject: [PATCH 09/76] Correctly emit constants in some ALU instructions. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 768d2eaee..056a8b7aa 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -40,7 +40,7 @@ void alu_instr_lit(quad op, int cc, int rd, int ra, quad value) /* 32 bit medium form? */ - if (value >= 0x1f) + if (value <= 0x1f) { emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); From 61bff180824b3c4bb9aef2ca18cddd1589f770d4 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 23:33:29 +0100 Subject: [PATCH 10/76] Added skeleton bootstrap code. --HG-- branch : dtrg-videocore rename : plat/pc86/boot.s => plat/rpi/boot.s --- plat/rpi/boot.s | 49 +++++++++++++++++++++++++++++++++++++++++++++++ plat/rpi/build.mk | 2 +- plat/rpi/descr | 6 +++--- 3 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 plat/rpi/boot.s diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s new file mode 100644 index 000000000..3cf4f3fe1 --- /dev/null +++ b/plat/rpi/boot.s @@ -0,0 +1,49 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +.sect .text + +begtext: +#if 0 + ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) + + mov di, begbss + mov cx, endbss + sub cx, di + mov ax, 0 + rep stosb + + ! Push standard parameters onto the stack and go. + + push envp ! envp + push argv ! argv + push 1 ! argc +#endif + b __m_a_i_n + +! Define symbols at the beginning of our various segments, so that we can find +! them. (Except .text, which has already been done.) + +.define begtext, begdata, begbss +.sect .data; begdata: +.sect .rom; begrom: +.sect .bss; begbss: + +! Some magic data. All EM systems need these. + +.define .trppc, .ignmask, _errno +.comm .trppc, 4 +.comm .ignmask, 4 +.comm _errno, 4 diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 20aed87f7..679be61ea 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -43,5 +43,5 @@ define build-rpi-boot-impl $(call installto, $(PLATIND)/$(PLATFORM)/boot.o) endef -#(eval $(build-rpi-boot-impl)) +$(eval $(build-rpi-boot-impl)) diff --git a/plat/rpi/descr b/plat/rpi/descr index 41dfc400f..fd9cc4dca 100644 --- a/plat/rpi/descr +++ b/plat/rpi/descr @@ -2,8 +2,8 @@ # $State$ # $Revision$ -var w=2 -var p=2 +var w=4 +var p=4 var s=2 var l=4 var f=4 @@ -12,7 +12,7 @@ var ARCH=vc4 var PLATFORM=rpi var PLATFORMDIR={EM}/share/ack/{PLATFORM} var CPP_F=-D__unix -var ALIGN=-a0:1 -a1:1 -a2:1 -a3:1 +var ALIGN=-a0:2 -a1:4 -a2:4 -a3:4 var MACHOPT_F=-m8 # Override the setting in fe so that files compiled for this platform can see From 76ba0bf6b3c856670d173ddc083a9779b78ab5b4 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 23:33:42 +0100 Subject: [PATCH 11/76] First steps towards a code generator. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/mach.c | 79 ++- mach/vc4/ncg/table | 1145 ++++++++++++++----------------------------- 2 files changed, 399 insertions(+), 825 deletions(-) diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c index f57a2a08f..375d8a6e7 100644 --- a/mach/vc4/ncg/mach.c +++ b/mach/vc4/ncg/mach.c @@ -8,10 +8,6 @@ #include #include -#ifndef NORCSID -static char rcsid[]= "$Id$" ; -#endif - int framesize; /* @@ -57,15 +53,14 @@ con_mult(word sz) #define FL_MSB_AT_LOW_ADDRESS 1 #include -prolog(full nlocals) +void prolog(full nlocals) { int ss = nlocals + 8; - fprintf(codefile, "addi sp, sp, %d\n", -ss); - fprintf(codefile, "stw fp, %d(sp)\n", nlocals); - fprintf(codefile, "mfspr r0, lr\n" - "stw r0, %d(sp)\n", nlocals+4); - fprintf(codefile, "addi fp, sp, %d\n", nlocals); - + fprintf(codefile, "push fp, lr\n"); + fprintf(codefile, "mov fp, sp\n"); + if (nlocals > 0) + fprintf(codefile, "sub sp, #%d\n", nlocals); + framesize = nlocals; } @@ -106,7 +101,7 @@ static int numsaved; /* Initialise regvar system for one function. */ -i_regsave() +void i_regsave(void) { int i; @@ -118,7 +113,7 @@ i_regsave() /* Mark a register as being saved. */ -regsave(const char* regname, full offset, int size) +void regsave(const char* regname, full offset, int size) { int regnum = atoi(regname+1); savedregsi[regnum] = offset; @@ -134,34 +129,29 @@ regsave(const char* regname, full offset, int size) /* Finish saving ragisters. */ -void saveloadregs(const char* ops, const char* opm) +static void saveloadregs(const char* op) { - int offset = -(framesize + numsaved*4); - int reg = 32; - - /* Check for the possibility of a multiple. */ - - do + int minreg = 32; + int maxreg = -1; + int i; + + for (i=0; i<32; i++) { - reg--; - } - while ((reg > 0) && (savedregsi[reg] != INT_MAX)); - if (reg < 31) - { - fprintf(codefile, "%s r%d, %d(fp)\n", opm, reg+1, offset); - offset += (31-reg)*4; - } - - /* Saved everything else singly. */ - - while (reg > 0) - { - if (savedregsi[reg] != INT_MAX) + if (savedregsi[i] != INT_MAX) { - fprintf(codefile, "%s r%d, %d(fp)\n", ops, reg, offset); - offset += 4; + if (i < minreg) + minreg = i; + if (i > maxreg) + maxreg = i; } - reg--; + } + + if (minreg != 32) + { + fprintf(codefile, "! saving registers %d to %d\n", minreg, maxreg); + assert(minreg == 6); + + fprintf(codefile, "%s r6-r%d\n", op, maxreg); } } @@ -169,13 +159,14 @@ f_regsave() { int i; fprintf(codefile, "! f_regsave()\n"); - fprintf(codefile, "addi sp, sp, %d\n", -numsaved*4); - - saveloadregs("stw", "stmw"); - + saveloadregs("push"); + for (i=0; i<32; i++) - if ((savedregsi[i] != INT_MAX) && (savedregsi[i] > 0)) - fprintf(codefile, "lwz r%d, %d(fp)\n", i, savedregsi[i]); + { + int o = savedregsi[i]; + if ((o != INT_MAX) && (o > 0)) + fprintf(codefile, "ld r%d, %d (fp)\n", i, savedregsi[i]); + } } /* Restore all saved registers. */ @@ -183,7 +174,7 @@ f_regsave() regreturn() { fprintf(codefile, "! regreturn()\n"); - saveloadregs("lwz", "lmw"); + saveloadregs("pop"); } /* Calculate the score of a given register. */ diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index f08ec3500..691d6076b 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -22,141 +22,68 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) -#define smalls(n) sfit(n, 16) -#define smallu(n) ufit(n, 16) - -#define lo(n) (n & 0xFFFF) -#define hi(n) ((n>>16) & 0xFFFF) - -/* Use these for instructions that treat the low half as signed --- his() - * includes a modifier to produce the correct value when the low half gets - * sign extended. Er, do make sure you load the low half second. */ -#define los(n) (n & 0xFFFF) -#define his(n) ((hi(n) - (lo(n)>>15)) & 0xFFFF) - -#define IFFALSE {CONST, 4} -#define IFTRUE {CONST, 12} -#define ALWAYS {CONST, 20} -#define DCTRZ {CONST, 34} - -#define LT {CONST, 0} -#define GT {CONST, 1} -#define EQ {CONST, 2} - PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ - FPR /* any FPR */ - FREG /* any allocatable FPR */ - SPR /* any SPR */ - CR /* any CR */ - - GPR0 GPRSP GPRFP GPR3 GPR4 GPR5 GPR6 GPR7 + LREG /* any allocatable low register (r0-r15) */ + HREG /* any allocatable high register (r0-r15) */ + STACKABLE /* a push/popable register (r0, r6, r16, fp) */ + + GPR0 GPR1 GPR2 GPR3 GPR4 GPR5 GPR6 GPR7 GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23 - GPR24 GPR25 GPR26 GPR27 GPR28 GPR29 GPR30 GPR31 - - CR0 CR1 - FPR0 FPR1 FPR2 FPR3 FPR4 FPR5 FPR6 FPR7 - FPR8 FPR9 FPR10 FPR11 FPR12 FPR13 FPR14 FPR15 - FPR16 FPR17 FPR18 FPR19 FPR20 FPR21 FPR22 FPR23 - FPR24 FPR25 FPR26 FPR27 FPR28 FPR29 FPR30 FPR31 + GPRFP GPRSP GPRLR GPRPC REGISTERS - /* Reverse order to encourage ncg to allocate them from r31 down */ - - R31("r31") : GPR, REG, GPR31 regvar. - R30("r30") : GPR, REG, GPR30 regvar. - R29("r29") : GPR, REG, GPR29 regvar. - R28("r28") : GPR, REG, GPR28 regvar. - R27("r27") : GPR, REG, GPR27 regvar. - R26("r26") : GPR, REG, GPR26 regvar. - R25("r25") : GPR, REG, GPR25 regvar. - R24("r24") : GPR, REG, GPR24 regvar. - R23("r23") : GPR, REG, GPR23 regvar. - R22("r22") : GPR, REG, GPR22 regvar. - R21("r21") : GPR, REG, GPR21 regvar. - R20("r20") : GPR, REG, GPR20 regvar. - R19("r19") : GPR, REG, GPR19 regvar. - R18("r18") : GPR, REG, GPR18 regvar. - R17("r17") : GPR, REG, GPR17 regvar. - R16("r16") : GPR, REG, GPR16 regvar. - R15("r15") : GPR, REG, GPR15 regvar. - R14("r14") : GPR, REG, GPR14 regvar. - R13("r13") : GPR, REG, GPR13 regvar. - R12("r12") : GPR, REG, GPR12. - R11("r11") : GPR, GPR11. - R10("r10") : GPR, REG, GPR10. - R9("r9") : GPR, REG, GPR9. - R8("r8") : GPR, REG, GPR8. - R7("r7") : GPR, REG, GPR7. - R6("r6") : GPR, REG, GPR6. - R5("r5") : GPR, REG, GPR5. - R4("r4") : GPR, REG, GPR4. - R3("r3") : GPR, REG, GPR3. - FP("fp") : GPR, GPRFP. + R0("r0") : GPR, REG, LREG, STACKABLE, GPR0. + R1("r1") : GPR, REG, LREG, GPR1. + R2("r2") : GPR, REG, LREG, GPR2. + R3("r3") : GPR, REG, LREG, GPR3. + R4("r4") : GPR, REG, LREG, GPR4. + R5("r5") : GPR, REG, LREG, GPR5. + R6("r6") : GPR, REG, LREG, STACKABLE, GPR6 regvar. + R7("r7") : GPR, REG, LREG, GPR7 regvar. + R8("r8") : GPR, REG, LREG, GPR8 regvar. + R9("r9") : GPR, REG, LREG, GPR9 regvar. + R10("r10") : GPR, REG, LREG, GPR10 regvar. + R11("r11") : GPR, REG, LREG, GPR11 regvar. + R12("r12") : GPR, REG, LREG, GPR12 regvar. + R13("r13") : GPR, REG, LREG, GPR13 regvar. + R14("r14") : GPR, REG, LREG, GPR14 regvar. + R15("r15") : GPR, REG, LREG, GPR15 regvar. + + R16("r16") : GPR, REG, HREG, STACKABLE, GPR16 regvar. + R17("r17") : GPR, REG, HREG, GPR17 regvar. + R18("r18") : GPR, REG, HREG, GPR18 regvar. + R19("r19") : GPR, REG, HREG, GPR19 regvar. + R20("r20") : GPR, REG, HREG, GPR20 regvar. + R21("r21") : GPR, REG, HREG, GPR21 regvar. + R22("r22") : GPR, REG, HREG, GPR22 regvar. + R23("r23") : GPR, GPR23. + FP("fp") : GPR, GPRFP, STACKABLE. SP("sp") : GPR, GPRSP. - R0("r0") : GPR, GPR0. - - F31("f31") : FPR, FREG, FPR31. - F30("f30") : FPR, FREG, FPR30. - F29("f29") : FPR, FREG, FPR29. - F28("f28") : FPR, FREG, FPR28. - F27("f27") : FPR, FREG, FPR27. - F26("f26") : FPR, FREG, FPR26. - F25("f25") : FPR, FREG, FPR25. - F24("f24") : FPR, FREG, FPR24. - F23("f23") : FPR, FREG, FPR23. - F22("f22") : FPR, FREG, FPR22. - F21("f21") : FPR, FREG, FPR21. - F20("f20") : FPR, FREG, FPR20. - F19("f19") : FPR, FREG, FPR19. - F18("f18") : FPR, FREG, FPR18. - F17("f17") : FPR, FREG, FPR17. - F16("f16") : FPR, FREG, FPR16. - F15("f15") : FPR, FREG, FPR15. - F14("f14") : FPR, FREG, FPR14. - F13("f13") : FPR, FREG, FPR13. - F12("f12") : FPR, FREG, FPR12. - F11("f11") : FPR, FREG, FPR11. - F10("f10") : FPR, FREG, FPR10. - F9("f9") : FPR, FREG, FPR9. - F8("f8") : FPR, FREG, FPR8. - F7("f7") : FPR, FREG, FPR7. - F6("f6") : FPR, FREG, FPR6. - F5("f5") : FPR, FREG, FPR5. - F4("f4") : FPR, FREG, FPR4. - F3("f3") : FPR, FREG, FPR3. - F2("f2") : FPR, FREG, FPR2. - F1("f1") : FPR, FREG, FPR1. - F0("f0") : FPR, FREG, FPR0. - - LR("lr") : SPR. - CTR("ctr") : SPR. - C0("cr0") : CR, CR0. - -#define SCRATCH R11 -#define FSCRATCH F0 + LR("lr") : GPR, GPRLR. + PC("pc") : GPR, GPRPC. + /* r26 to r31 are special and the code generator doesn't touch them. */ +#define SCRATCH R23 TOKENS /* Used only in instruction descriptions (to generate the correct syntax). */ - GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". - GPRINDIRECTLO = { GPR reg; ADDR adr; } 4 ">" adr "(" reg ")". /* Warning! Do not use on labels. */ - HILABEL = { ADDR adr; } 4 "<" adr. - LOLABEL = { ADDR adr; } 4 ">" adr. + GPROFFSET = { GPR reg; INT off; } 4 off "(" reg ")". + GPRGPR = { GPR reg1; GPR reg2; } 4 "(" reg1 "," reg2 ")". /* Primitives */ LABEL = { ADDR adr; } 4 adr. - CONST = { INT val; } 4 val. + CONST = { INT val; } 4 "#" val. LOCAL = { INT off; } 4. /* Allows us to use regvar() to refer to registers */ @@ -168,40 +95,27 @@ TOKENS SUM_RC = { GPR reg; INT off; } 4. SUM_RR = { GPR reg1; GPR reg2; } 4. - TRISTATE_RC_S = { GPR reg; INT val; } 4. - TRISTATE_RC_U = { GPR reg; INT val; } 4. - TRISTATE_RR_S = { GPR reg1; GPR reg2; } 4. - TRISTATE_RR_U = { GPR reg1; GPR reg2; } 4. - - TRISTATE_FF = { FPR reg1; FPR reg2; } 4. - SEX_B = { GPR reg; } 4. SEX_H = { GPR reg; } 4. IND_RC_B = { GPR reg; INT off; } 4. + IND_RR_B = { GPR reg1; GPR reg2; } 4. + IND_LABEL_B = { ADDR adr; } 4. + IND_RC_H = { GPR reg; INT off; } 4. + IND_RR_H = { GPR reg1; GPR reg2; } 4. + IND_LABEL_H = { ADDR adr; } 4. + IND_RC_H_S = { GPR reg; INT off; } 4. - IND_RC_W = { GPR reg; INT off; } 4. - IND_RR_W = { GPR reg1; GPR reg2; } 4. - IND_LABEL_W = { ADDR adr; } 4. + + IND_RC_Q = { GPR reg; INT off; } 4. + IND_RR_Q = { GPR reg1; GPR reg2; } 4. + IND_LABEL_Q = { ADDR adr; } 4. + IND_RC_D = { GPR reg; INT off; } 8. IND_RR_D = { GPR reg1; GPR reg2; } 8. IND_LABEL_D = { ADDR adr; } 8. - NOT_R = { GPR reg; } 4. - - AND_RR = { GPR reg1; GPR reg2; } 4. - AND_RC = { GPR reg; INT val; } 4. - OR_RR = { GPR reg1; GPR reg2; } 4. - OR_RC = { GPR reg; INT val; } 4. - XOR_RR = { GPR reg1; GPR reg2; } 4. - XOR_RC = { GPR reg; INT val; } 4. - -/* Floats */ - - FD = { FPR reg; } 8 reg. - FS = { FPR reg; } 4 reg. - /* Comments */ LABELI = { ADDR msg; INT num; } 4 msg " " num. @@ -216,116 +130,48 @@ SETS SUM_ALL = SUM_RC + SUM_RR. - TRISTATE_ALL = TRISTATE_RC_S + TRISTATE_RC_U + TRISTATE_RR_S + - TRISTATE_RR_U + TRISTATE_FF. - SEX_ALL = SEX_B + SEX_H. - LOGICAL_ALL = NOT_R + AND_RR + AND_RC + OR_RR + OR_RC + XOR_RR + - XOR_RC. - - IND_ALL_W = IND_RC_W + IND_RR_W + IND_LABEL_W. + IND_ALL_B = IND_RC_B + IND_RR_B + IND_LABEL_B. + IND_ALL_H = IND_RC_H + IND_RR_H + IND_LABEL_H. + IND_ALL_Q = IND_RC_Q + IND_RR_Q + IND_LABEL_Q. IND_ALL_D = IND_RC_D + IND_RR_D + IND_LABEL_D. - - OP_ALL_W = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + - IND_ALL_W. +#if 0 + OP_ALL_Q = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + + IND_ALL_Q. +#endif + + OP_ALL_Q = SUM_ALL + SEX_ALL + IND_ALL_B + IND_ALL_H + IND_ALL_Q. INSTRUCTIONS - add GPRI:wo, GPRI:ro, GPRI:ro. - addX "add." GPRI:wo, GPRI:ro, GPRI:ro. - addi GPRI:wo, GPRI:ro, CONST:ro. - addis GPRI:wo, GPRI:ro, CONST+HILABEL:ro. - and GPRI:wo, GPRI:ro, GPRI:ro. - andc GPRI:wo, GPRI:ro, GPRI:ro. - andiX "andi." GPRI:wo, GPRI:ro, CONST:ro kills :cc. - andisX "andis." GPRI:wo, GPRI:ro, CONST:ro kills :cc. - b LABEL:ro. - bc CONST:ro, CONST:ro, LABEL:ro. - bcctr CONST:ro, CONST:ro, CONST:ro. - bcctrl CONST:ro, CONST:ro, CONST:ro. - bclr CONST:ro, CONST:ro, CONST:ro. - bl LABEL:ro. - cmp CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpi CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - cmpl CR:ro, CONST:ro, GPRI:ro, GPR:ro kills :cc. - cmpli CR:ro, CONST:ro, GPRI:ro, CONST:ro kills :cc. - divw GPRI:wo, GPRI:ro, GPRI:ro. - divwu GPRI:wo, GPRI:ro, GPRI:ro. - eqv GPRI:wo, GPRI:ro, GPRI:ro. - extsb GPRI:wo, GPRI:ro. - extsh GPRI:wo, GPRI:ro. - fadd FD:wo, FD:ro, FD:ro. - fadds FS:wo, FS:ro, FS:ro. - fcmpo CR:wo, FD:ro, FD:ro. - fdiv FD:wo, FD:ro, FD:ro. - fdivs FS:wo, FS:ro, FS:ro. - fneg FS+FD:wo, FS+FD:ro. - fmul FD:wo, FD:ro, FD:ro. - fmuls FS:wo, FS:ro, FS:ro. - frsp FS:wo, FD:ro. - fsub FD:wo, FD:ro, FD:ro. - fsubs FS:wo, FS:ro, FS:ro. - fmr FS+FD:wo, FS+FD:ro. - lbzx GPRI:wo, GPR:ro, GPR:ro. - lbz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfd FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdu FD:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfdx FD:wo, GPR:ro, GPR:ro. - lfs FS:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lfsu FS:wo, GPRINDIRECT+GPRINDIRECTLO:rw. - lfsx FS:wo, GPR:ro, GPR:ro. - lhzx GPRI:wo, GPR:ro, GPR:ro. - lhax GPRI:wo, GPR:ro, GPR:ro. - lha GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lhz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lwzu GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - lwzx GPRI:wo, GPR:ro, GPR:ro. - lwz GPRI:wo, GPRINDIRECT+GPRINDIRECTLO:ro. - nand GPRI:wo, GPRI:ro, GPRI:ro. - neg GPRI:wo, GPRI:ro. - nor GPRI:wo, GPRI:ro, GPRI:ro. - mfcr GPRI:wo. - mullw GPRI:wo, GPRI:ro, GPRI:ro. - mfspr GPRI:wo, SPR:ro. - mtspr SPR:wo, GPRI:ro. - or GPRI:wo, GPRI:ro, GPRI:ro. - orc GPRI:wo, GPRI:ro, GPRI:ro. - ori GPRI:wo, GPRI:ro, CONST+LOLABEL:ro. - orX "or." GPRI:wo, GPRI:ro, GPRI:ro kills :cc. - rlwinm GPRI:wo, GPRI:ro, CONST:ro, CONST:ro, CONST:ro. - slw GPRI:wo, GPRI:ro, GPRI:ro. - subf GPRI:wo, GPRI:ro, GPRI:ro. - sraw GPRI:wo, GPRI:ro, GPRI:ro. - srawi GPRI:wo, GPRI:ro, CONST:ro. - srw GPRI:wo, GPRI:ro, GPRI:ro. - stb GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stbx GPRI:ro, GPR:ro, GPR:ro. - stfd FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdu FD:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfdx FD:ro, GPR:ro, GPR:ro. - stfs FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsu FS:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stfsx FS:ro, GPR:ro, GPR:ro. - sth GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - sthx GPRI:ro, GPR:ro, GPR:ro. - stw GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - stwx GPRI:ro, GPR:ro, GPR:ro. - stwu GPRI:ro, GPRINDIRECT+GPRINDIRECTLO:rw. - xor GPRI:wo, GPRI:ro, GPRI:ro. - xori GPRI:wo, GPRI:ro, CONST:ro. + add GPRI:wo, GPRI:ro, GPRI+CONST:ro. + beq "b.eq" LABEL:ro. + bne "b.ne" LABEL:ro. + b GPRI+LABEL:ro. + bl GPRI+LABEL:ro. + cmp GPRI:ro, GPRI+CONST:ro. + exts GPRI:wo, GPRI:ro, GPRI+CONST:ro. + ld GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldb GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldh GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldhs GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. + lea GPRI:wo, LABEL:ro. + lsl GPRI:wo, GPRI:ro, GPRI+CONST:ro. + mov GPRI:wo, GPRI+CONST:ro. + pop STACKABLE:wo. + pop STACKABLE:wo, GPRLR+GPRPC:wo. + push STACKABLE:ro. + sub GPRI:wo, GPRI:ro, CONST+GPRI:ro. + st GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. + stb GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. + sth GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. + sths GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. - gpr_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - gpr_gpr_si GPRI:wo, GPRI:ro, CONST:ro. - gpr_ro_gprindirect GPRI:ro, GPRINDIRECT:rw. - gpr_ro_gpr_gpr GPRI:ro, GPRI:ro, GPRI:ro. - gpr_wo_gprindirect GPRI:wo, GPRINDIRECT:ro. - gpr_wo_gpr_gpr GPRI:wo, GPRI:ro, GPRI:ro. - - invalid "invalid". - comment "!" LABEL+LABELI:ro. + invalid "invalid". + comment "!" LABEL+LABELI:ro. @@ -333,8 +179,8 @@ MOVES from GPR to GPR gen - COMMENT("move GPR->GPR") - or %2, %1, %1 + COMMENT("mov GPR->GPR") + mov %2, %1 /* GPRE exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. */ @@ -342,455 +188,210 @@ MOVES from GPR to GPRE gen COMMENT("move GPR->GPRE") - or %2, %1, %1 - + mov %2, %1 + + from GPRE to GPR + gen + COMMENT("move GPRE->GPR") + mov %2, %1 + /* Constants */ - from CONST smalls(%val) to GPR - gen - COMMENT("move CONST->GPRE") - addi %2, R0, {CONST, lo(%1.val)} - from CONST to GPR gen - COMMENT("move CONST->GPRE") - addis %2, R0, {CONST, hi(%1.val)} - ori %2, %2, {CONST, lo(%1.val)} - + COMMENT("move CONST->GPR") + mov %2, %1 + from LABEL to GPR gen COMMENT("move LABEL->GPR") - addis %2, R0, {HILABEL, %1.adr} - ori %2, %2, {LOLABEL, %1.adr} - + lea %2, {LABEL, %1.adr} + /* Sign extension */ from SEX_B to GPR gen COMMENT("move SEX_B->GPR") - extsb %2, %1.reg - + exts %2, %1.reg, {CONST, 8} + from SEX_H to GPR gen COMMENT("move SEX_H->GPR") - extsh %2, %1.reg - + exts %2, %1.reg, {CONST, 16} + /* Register + something */ - from SUM_RC smalls(%off) to GPR - gen - COMMENT("move SUM_RC->GPR smalls") - addi %2, %1.reg, {CONST, lo(%1.off)} - from SUM_RC to GPR gen - COMMENT("move SUM_RC->GPR large") - addi %2, %1.reg, {CONST, los(%1.off)} - addis %2, %2, {CONST, his(%1.off)} - + COMMENT("move SUM_RC->GPR") + add %2, %1.reg, {CONST, %1.off} + from SUM_RR to GPR gen COMMENT("move SUM_RR->GPR") add %2, %1.reg1, %1.reg2 - + from SUM_RR to GPR gen COMMENT("move SUM_RR->GPRE") add %2, %1.reg1, %1.reg2 - -/* Read/write byte */ - from IND_RC_B smalls(%off) to GPR - gen - COMMENT("move IND_RC_B->GPR small") - lbz %2, {GPRINDIRECT, %1.reg, %1.off} - +/* Read byte */ + from IND_RC_B to GPR gen - COMMENT("move IND_RC_B->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lbz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from GPR to IND_RC_B smalls(%off) + COMMENT("move IND_RC_B->GPR") + ldb %2, {GPROFFSET, %1.reg, %1.off} + + from IND_RR_B to GPR gen - COMMENT("move GPR->IND_RC_B small") - stb %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_RR_B->GPR") + ldb %2, {GPRGPR, %1.reg1, %1.reg2} + + from IND_LABEL_B to GPR + gen + COMMENT("move IND_LABEL_B->GPR") + ldb %2, {LABEL, %1.adr} + +/* Write byte */ + from GPR to IND_RC_B gen - COMMENT("move GPR->IND_RC_B large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stb %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - -/* Read/write short */ + COMMENT("move GPR->IND_RC_B") + stb %1, {GPROFFSET, %2.reg, %2.off} - from IND_RC_H smalls(%off) to GPR + from GPR to IND_RR_B gen - COMMENT("move IND_RC_H->GPR small") - lhz %2, {GPRINDIRECT, %1.reg, %1.off} - + COMMENT("move GPR->IND_RR_B") + stb %1, {GPRGPR, %2.reg1, %2.reg2} + + from GPR to IND_LABEL_B + gen + COMMENT("move GPR->IND_LABEL_B") + stb %1, {LABEL, %2.adr} + +/* Read short */ + from IND_RC_H to GPR gen - COMMENT("move IND_RC_H->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lhz %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from IND_RC_H_S smalls(%off) to GPR + COMMENT("move IND_RC_H->GPR") + ldh %2, {GPROFFSET, %1.reg, %1.off} + + from IND_RR_H to GPR gen - COMMENT("move IND_RC_H_S->GPR small") - lha %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_H_S to GPR + COMMENT("move IND_RR_H->GPR") + ldh %2, {GPRGPR, %1.reg1, %1.reg2} + + from IND_LABEL_H to GPR gen - COMMENT("move IND_RC_H_S->GPR large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lha %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from GPR to IND_RC_H smalls(%off) - gen - COMMENT("move GPR->IND_RC_H small") - sth %1, {GPRINDIRECT, %2.reg, %2.off} - + COMMENT("move IND_LABEL_H->GPR") + ldh %2, {LABEL, %1.adr} + +/* Write short */ + from GPR to IND_RC_H gen - COMMENT("move GPR->IND_RC_H large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - sth %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + COMMENT("move GPR->IND_RC_H") + sth %1, {GPROFFSET, %2.reg, %2.off} + + from GPR to IND_RR_H + gen + COMMENT("move GPR->IND_RR_H") + sth %1, {GPRGPR, %2.reg1, %2.reg2} + + from GPR to IND_LABEL_H + gen + COMMENT("move GPR->IND_LABEL_H") + sth %1, {LABEL, %2.adr} + +/* Read quad */ + + from IND_RC_Q to GPR + gen + COMMENT("move IND_RC_Q->GPR") + ld %2, {GPROFFSET, %1.reg, %1.off} -/* Read word */ - - from IND_RC_W smalls(%off) to GPR + from IND_RR_Q to GPR gen - COMMENT("move IND_RC_W->GPR small") - lwz %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_W to GPR - gen - COMMENT("move IND_RC_W->GPR large") - addis %2, %1.reg, {CONST, his(%1.off)} - lwz %2, {GPRINDIRECT, %2, los(%1.off)} - - from IND_RR_W to GPR - gen - COMMENT("move IND_RR_W->GPR") - lwzx %2, %1.reg1, %1.reg2 + COMMENT("move IND_RR_Q->GPR") + ld %2, {GPRGPR, %1.reg1, %1.reg2} - from IND_LABEL_W to GPR + from IND_LABEL_Q to GPR gen - COMMENT("move IND_LABEL_W->GPR") - move {LABEL, %1.adr}, SCRATCH - lwz %2, {GPRINDIRECT, SCRATCH, 0} - - from IND_RC_W smalls(%off) to FS - gen - COMMENT("move IND_RC_W->FS small") - lfs %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_W to FS - gen - COMMENT("move IND_RC_W->FS large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfs %2, {GPRINDIRECT, SCRATCH, los(%1.off)} + COMMENT("move IND_LABEL_Q->GPR") + ld %2, {LABEL, %1.adr} - from IND_RR_W to FS - gen - COMMENT("move IND_RR_W->FS") - lfsx %2, %1.reg1, %1.reg2 - - from IND_LABEL_W to FS - gen - COMMENT("move IND_LABEL_W->FS") - move {LABEL, %1.adr}, SCRATCH - lfs %2, {GPRINDIRECT, SCRATCH, 0} - -/* Write word */ +/* Write quad */ - from GPR to IND_RC_W smalls(%off) + from GPR to IND_RC_Q gen - COMMENT("move GPR->IND_RC_W small") - stw %1, {GPRINDIRECT, %2.reg, %2.off} - - from GPR to IND_RC_W - gen - COMMENT("move GPR->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stw %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + COMMENT("move GPR->IND_RC_Q") + st %1, {GPROFFSET, %2.reg, %2.off} - from GPR to IND_RR_W + from GPR to IND_RR_Q gen - COMMENT("move GPR->IND_RR_W") - stwx %1, %2.reg1, %2.reg2 - - from GPR to IND_LABEL_W - gen - COMMENT("move GPR->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stw %1, {GPRINDIRECT, SCRATCH, 0} - - from FS to IND_RC_W smalls(%off) - gen - COMMENT("move FS->IND_RC_W small") - stfs %1, {GPRINDIRECT, %2.reg, %2.off} - - from FS to IND_RC_W - gen - COMMENT("move FS->IND_RC_W large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfs %1, {GPRINDIRECT, SCRATCH, los(%2.off)} + COMMENT("move GPR->IND_RR_Q") + st %1, {GPRGPR, %2.reg1, %2.reg2} - from FS to IND_RR_W + from GPR to IND_LABEL_Q gen - COMMENT("move FS->IND_RR_W") - stfsx %1, %2.reg1, %2.reg2 - - from FS to IND_LABEL_W - gen - COMMENT("move FS->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfs %1, {GPRINDIRECT, SCRATCH, 0} - -/* Read double */ - - from IND_RC_D smalls(%off) to FD - gen - COMMENT("move IND_RC_D->FD small") - lfd %2, {GPRINDIRECT, %1.reg, %1.off} - - from IND_RC_D to FD - gen - COMMENT("move IND_RC_D->FD large") - addis SCRATCH, %1.reg, {CONST, his(%1.off)} - lfd %2, {GPRINDIRECT, SCRATCH, los(%1.off)} - - from IND_RR_D to FD - gen - COMMENT("move IND_RR_D->FD") - lfdx %2, %1.reg1, %1.reg2 - - from IND_LABEL_D to FD - gen - COMMENT("move IND_LABEL_D->FD") - move {LABEL, %1.adr}, SCRATCH - lfd %2, {GPRINDIRECT, SCRATCH, 0} - -/* Write double */ - - from FD to IND_RC_D smalls(%off) - gen - COMMENT("move FD->IND_RC_D small") - stfd %1, {GPRINDIRECT, %2.reg, %2.off} - - from FD to IND_RC_D - gen - COMMENT("move FD->IND_RC_D large") - addis SCRATCH, %2.reg, {CONST, his(%2.off)} - stfd %1, {GPRINDIRECT, SCRATCH, los(%2.off)} - - from FD to IND_RR_D - gen - COMMENT("move FD->IND_RR_W") - stfdx %1, %2.reg1, %2.reg2 - - from FD to IND_LABEL_D - gen - COMMENT("move FD->IND_LABEL_D") - move {LABEL, %2.adr}, SCRATCH - stfd %1, {GPRINDIRECT, SCRATCH, 0} - -/* Extract condition code field (actually produces (CC&3)<<2) */ - - from CR0 to GPR - gen - COMMENT("move CR0->GPR") - mfcr %2 - rlwinm %2, %2, {CONST, 4}, {CONST, 32-4}, {CONST, 31-2} - -/* Comparisons */ - - from TRISTATE_RR_S to CR0 - gen - cmp %2, {CONST, 0}, %1.reg1, %1.reg2 - - from TRISTATE_RR_U to CR0 - gen - cmpl %2, {CONST, 0}, %1.reg1, %1.reg2 - - from TRISTATE_RC_S to CR0 - gen - COMMENT("move TRISTATE_RC_S->CR0 large") - move {CONST, %1.val}, SCRATCH - cmp %2, {CONST, 0}, %1.reg, SCRATCH - - from TRISTATE_RC_U smallu(%val) to CR0 - gen - COMMENT("move TRISTATE_RC_U->CR0 small") - cmpli %2, {CONST, 0}, %1.reg, {CONST, %1.val} - - from TRISTATE_RC_U to CR0 - gen - COMMENT("move TRISTATE_RC_U->CR0") - move {CONST, %1.val}, SCRATCH - cmpl %2, {CONST, 0}, %1.reg, SCRATCH - - from TRISTATE_FF to CR0 - gen - COMMENT("move TRISTATE_FF->CR0") - fcmpo %2, {FD, %1.reg1}, {FD, %1.reg2} - - from GPR to CR0 - gen - COMMENT("move GPR->CR0") - orX SCRATCH, %1, %1 /* alas, can't call test */ - - from TRISTATE_RR_S + TRISTATE_RC_S + TRISTATE_FF to GPR - gen - COMMENT("move TRISTATE_R*_S->GPR") - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tristate_s_table"}, %2 - lwzx %2, %2, SCRATCH - - from TRISTATE_RR_U + TRISTATE_RC_U to GPR - gen - COMMENT("move TRISTATE_R*_U->GPR") - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tristate_u_table"}, %2 - lwzx %2, %2, SCRATCH - -/* Logicals */ - - from NOT_R to GPR - gen - COMMENT("move NOT_R->GPR") - nor %2, %1.reg, %1.reg - - from AND_RR to GPR - gen - COMMENT("move AND_RR->GPR") - and %2, %1.reg1, %1.reg2 - - from AND_RC smallu(%val) to GPR - gen - COMMENT("move AND_RC->GPR small") - andiX %2, %1.reg, {CONST, %1.val} - - from AND_RC to GPR - gen - COMMENT("move AND_RC->GPR") - move {CONST, %1.val}, SCRATCH - and %2, %1.reg, SCRATCH - - from OR_RR to GPR - gen - COMMENT("move OR_RR->GPR") - or %2, %1.reg1, %1.reg2 - - from OR_RC smallu(%val) to GPR - gen - COMMENT("move OR_RC->GPR small") - ori %2, %1.reg, {CONST, %1.val} - - from OR_RC to GPR - gen - COMMENT("move OR_RC->GPR") - move {CONST, %1.val}, SCRATCH - or %2, %1.reg, SCRATCH - - from XOR_RR to GPR - gen - COMMENT("move XOR_RR->GPR") - xor %2, %1.reg1, %1.reg2 - - from XOR_RC smallu(%val) to GPR - gen - COMMENT("move XOR_RC->GPR small") - xori %2, %1.reg, {CONST, %1.val} - - from XOR_RC to GPR - gen - COMMENT("move XOR_RC->GPR") - move {CONST, %1.val}, SCRATCH - xor %2, %1.reg, SCRATCH + COMMENT("move GPR->IND_LABEL_Q") + st %1, {LABEL, %2.adr} /* Miscellaneous */ - from OP_ALL_W + LABEL + CONST to GPRE + from CONST + LABEL + GPR + OP_ALL_Q to GPRE gen move %1, %2.reg - + +#if 0 TESTS - + to test GPR gen - orX SCRATCH, %1, %1 + invalid +#endif STACKINGRULES - - from GPR to STACK + + from STACKABLE to STACK gen - COMMENT("stack GPR") - stwu %1, {GPRINDIRECT, SP, 0-4} - - from CONST to STACK - uses REG + COMMENT("stack STACKABLE") + push %1 + + from REG to STACK + uses STACKABLE + gen + COMMENT("stack non-STACKABLE") + move %1, %a + push %a + + from REG to STACK + gen + COMMENT("stack non-STACKABLE, fallback") + sub SP, SP, {CONST, 4} + st %1, {GPROFFSET, SP, 0} + + from CONST + OP_ALL_Q to STACK + uses STACKABLE gen - COMMENT("stack CONST") move %1, %a - stwu %a, {GPRINDIRECT, SP, 0-4} - - from LABEL to STACK - uses REG - gen - COMMENT("stack LABEL") - move %1, {GPRE, %a} - stwu %a, {GPRINDIRECT, SP, 0-4} - - from SEX_B to STACK - gen - COMMENT("stack SEX_B") - extsb SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} - - from SEX_H to STACK - gen - COMMENT("stack SEX_H") - extsh SCRATCH, %1.reg - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} - - from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL to STACK - gen - move %1, {GPRE, SCRATCH} - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} - - from IND_ALL_W to STACK + push %a + + from CONST + OP_ALL_Q to STACK gen + COMMENT("fallback stack") move %1, SCRATCH - stwu SCRATCH, {GPRINDIRECT, SP, 0-4} - - from IND_ALL_D to STACK - gen - move %1, {FD, FSCRATCH} - stfdu {FD, FSCRATCH}, {GPRINDIRECT, SP, 0-8} - - from FD to STACK - gen - COMMENT("stack FD") - stfdu %1, {GPRINDIRECT, SP, 0-8} - - from FS to STACK - gen - COMMENT("stack FS") - stfsu %1, {GPRINDIRECT, SP, 0-4} + sub SP, SP, {CONST, 4} + st SCRATCH, {GPROFFSET, SP, 0} from TOKEN to STACK gen invalid. - - + COERCIONS @@ -800,7 +401,14 @@ COERCIONS COMMENT("coerce REG->REG") move %1, %a yields %a - + + from GPRE + uses REG + gen + COMMENT("coerce GPRE->REG") + move %1, %a + yields %a + from CONST uses REG gen @@ -812,83 +420,49 @@ COERCIONS uses REG gen COMMENT("coerce LABEL->REG") - move %1, {GPRE, %a} + move %1, %a yields %a - + from STACK - uses REG + uses STACKABLE gen COMMENT("coerce STACK->REG") - lwz %a, {GPRINDIRECT, SP, 0} - addi SP, SP, {CONST, 4} + pop %a yields %a - + from SEX_B uses REG gen COMMENT("coerce SEX_B->REG") - extsb %a, %1.reg + exts %a, %1.reg, {CONST, 8} yields %a from SEX_H uses REG gen COMMENT("coerce SEX_H->REG") - extsh %a, %1.reg + exts %a, %1.reg, {CONST, 16} yields %a - + +#if 0 from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL uses REG gen move %1, {GPRE, %a} yields %a - from FS - uses FREG - gen - fmr {FS, %a}, %1 - yields {FS, %a} - - from FD - uses FREG - gen - fmr {FD, %a}, %1 - yields {FD, %a} - - from STACK - uses FREG - gen - COMMENT("coerce STACK->FD") - lfd {FD, %a}, {GPRINDIRECT, SP, 0} - addi SP, SP, {CONST, 8} - yields {FD, %a} - - from STACK - uses FREG - gen - COMMENT("coerce STACK->FS") - lfs {FS, %a}, {GPRINDIRECT, SP, 0} - addi SP, SP, {CONST, 4} - yields {FS, %a} - - from IND_ALL_W + from IND_ALL_Q uses REG gen move %1, %a yields %a - - from IND_ALL_W - uses FREG +#endif + from OP_ALL_Q + uses REG gen - move %1, {FS, %a} - yields {FS, %a} - - from IND_ALL_D - uses FREG - gen - move %1, {FD, %a} - yields {FD, %a} - + move %1, %a + yields %a + @@ -900,15 +474,15 @@ PATTERNS yields {CONST, $1} pat dup $1==INT32 /* Duplicate word on top of stack */ - with GPR + with REG yields %1 %1 pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with GPR GPR + with REG REG yields %2 %1 %2 %1 pat exg $1==INT32 /* Exchange top two words on stack */ - with GPR GPR + with REG REG yields %1 %2 pat stl lol $1==$2 /* Store then load local */ @@ -964,11 +538,11 @@ PATTERNS /* nop */ pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ - with GPR + with REG yields {SEX_B, %1} pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ - with GPR + with REG yields {SEX_H, %1} @@ -992,13 +566,13 @@ PATTERNS leaving lal $1 loi INT32*2 - + pat stl inreg($1)>0 /* Store to local */ - with CONST + LABEL + GPR + OP_ALL_W + with CONST + LABEL + GPR + OP_ALL_Q kills regvar($1), LOCAL %off==$1 gen move %1, {GPRE, regvar($1)} - + pat stl /* Store to local */ leaving lal $1 @@ -1012,7 +586,7 @@ PATTERNS pat lil inreg($1)>0 /* Load from indirected local */ uses REG gen - lwz %a, {GPRINDIRECT, regvar($1), 0} + ld %a, {GPROFFSET, regvar($1), 0} yields %a pat lil /* Load from indirected local */ @@ -1084,19 +658,21 @@ PATTERNS loc 0 ste $1 +#if 0 pat ine /* Increment external */ uses REG={LABEL, $1}, REG gen - lwz %b, {GPRINDIRECT, %a, 0} + lwz %b, {GPROFFSET, %a, 0} addi %b, %b, {CONST, 1} - stw %b, {GPRINDIRECT, %a, 0} + stw %b, {GPROFFSET, %a, 0} pat dee /* Decrement external */ uses REG={LABEL, $1}, REG gen - lwz %b, {GPRINDIRECT, %a, 0} + lwz %b, {GPROFFSET, %a, 0} addi %b, %b, {CONST, 0-1} - stw %b, {GPRINDIRECT, %a, 0} + stw %b, {GPROFFSET, %a, 0} +#endif @@ -1128,26 +704,18 @@ PATTERNS pat loi $1==INT8 /* Load byte indirect */ with GPR - uses REG - gen - lbz %a, {GPRINDIRECT, %1, 0} - yields %a + yields {IND_RC_B, %1, 0} with SUM_RR - uses reusing %1, REG - gen - lbzx %a, %1.reg1, %1.reg2 - yields %a + yields {IND_RR_B, %1.reg1, %1.reg2} with SUM_RC - uses REG - gen - move {IND_RC_B, %1.reg, %1.off}, %a - yields %a - + yields {IND_RC_B, %1.reg, %1.off} + +#if 0 pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ with GPR uses REG gen - lha %a, {GPRINDIRECT, %1, 0} + lha %a, {GPROFFSET, %1, 0} yields %a with SUM_RR uses reusing %1, REG @@ -1164,7 +732,7 @@ PATTERNS with GPR uses REG gen - lhz %a, {GPRINDIRECT, %1, 0} + lhz %a, {GPROFFSET, %1, 0} yields %a with SUM_RR uses reusing %1, REG @@ -1176,18 +744,20 @@ PATTERNS gen move {IND_RC_H, %1.reg, %1.off}, %a yields %a +#endif - pat loi $1==INT32 /* Load word indirect */ + pat loi $1==INT32 /* Load quad indirect */ with GPR - yields {IND_RC_W, %1, 0} + yields {IND_RC_Q, %1, 0} with SUM_RC - yields {IND_RC_W, %1.reg, %1.off} + yields {IND_RC_Q, %1.reg, %1.off} with SUM_RR - yields {IND_RR_W, %1.reg1, %1.reg2} + yields {IND_RR_Q, %1.reg1, %1.reg2} with LABEL - yields {IND_LABEL_W, %1.adr} + yields {IND_LABEL_Q, %1.adr} - pat loi $1==INT64 /* Load double-word indirect */ +#if 0 + pat loi $1==INT64 /* Load double-quad indirect */ with GPR yields {IND_RC_D, %1, 0} with SUM_RC @@ -1196,6 +766,7 @@ PATTERNS yields {IND_RR_D, %1.reg1, %1.reg2} with LABEL yields {IND_LABEL_D, %1.adr} +#endif pat loi /* Load arbitrary size */ leaving @@ -1203,7 +774,7 @@ PATTERNS los INT32 pat los /* Load arbitrary size */ - with GPR3 GPR4 STACK + with GPR0 GPR1 STACK kills ALL gen bl {LABEL, ".los"} @@ -1211,57 +782,64 @@ PATTERNS pat sti $1==INT8 /* Store byte indirect */ with GPR GPR gen - stb %2, {GPRINDIRECT, %1, 0} + move %2, {IND_RC_B, %1, 0} with SUM_RR GPR gen - stbx %2, %1.reg1, %1.reg2 + move %2, {IND_RR_B, %1.reg1, %1.reg2} with SUM_RC GPR gen move %2, {IND_RC_B, %1.reg, %1.off} with GPR SEX_B gen - stb %2.reg, {GPRINDIRECT, %1, 0} + move %2.reg, {IND_RC_B, %1, 0} with SUM_RR SEX_B gen - stbx %2.reg, %1.reg1, %1.reg2 + move %2.reg, {IND_RR_B, %1.reg1, %1.reg2} with SUM_RC SEX_B gen move %2.reg, {IND_RC_B, %1.reg, %1.off} + with LABEL GPR + gen + move %2, {IND_LABEL_B, %1.adr} pat sti $1==INT16 /* Store half-word indirect */ with GPR GPR gen - sth %2, {GPRINDIRECT, %1, 0} + move %2, {IND_RC_H, %1, 0} with SUM_RR GPR gen - sthx %2, %1.reg1, %1.reg2 + move %2, {IND_RR_H, %1.reg1, %1.reg2} with SUM_RC GPR gen move %2, {IND_RC_H, %1.reg, %1.off} with GPR SEX_H gen - sth %2.reg, {GPRINDIRECT, %1, 0} + move %2.reg, {IND_RC_H, %1, 0} with SUM_RR SEX_H gen - sthx %2.reg, %1.reg1, %1.reg2 + move %2.reg, {IND_RR_H, %1.reg1, %1.reg2} with SUM_RC SEX_H gen move %2.reg, {IND_RC_H, %1.reg, %1.off} + with LABEL GPR + gen + move %2, {IND_LABEL_H, %1.adr} - pat sti $1==INT32 /* Store word indirect */ - with GPR GPR+FS + pat sti $1==INT32 /* Store quad indirect */ + with GPR GPR gen - move %2, {IND_RC_W, %1, 0} - with SUM_RR GPR+FS + move %2, {IND_RC_Q, %1, 0} + with SUM_RR GPR gen - move %2, {IND_RR_W, %1.reg1, %1.reg2} - with SUM_RC GPR+FS + move %2, {IND_RR_Q, %1.reg1, %1.reg2} + with SUM_RC GPR gen - move %2, {IND_RC_W, %1.reg, %1.off} - with LABEL GPR+FS + move %2, {IND_RC_Q, %1.reg, %1.off} + with LABEL GPR gen - move %2, {IND_LABEL_W, %1.adr} + move %2, {IND_LABEL_Q, %1.adr} +#if 0 pat sti $1==INT64 /* Store double-word indirect */ with GPR FD gen @@ -1274,12 +852,12 @@ PATTERNS move %2, {IND_RC_D, %1.reg, %1.off} with GPR GPR GPR gen - stw %2, {GPRINDIRECT, %1, 0} - stw %3, {GPRINDIRECT, %1, 4} + stw %2, {GPROFFSET, %1, 0} + stw %3, {GPROFFSET, %1, 4} with SUM_RC GPR GPR gen - move %2, {IND_RC_W, %1.reg, %1.off} - move %3, {IND_RC_W, %1.reg, %1.off+4} + move %2, {IND_RC_Q, %1.reg, %1.off} + move %3, {IND_RC_Q, %1.reg, %1.off+4} with LABEL FD gen move %2, {IND_LABEL_D, %1.adr} @@ -1295,6 +873,7 @@ PATTERNS kills ALL gen bl {LABEL, ".sts"} +#endif @@ -1351,7 +930,7 @@ PATTERNS /* Word arithmetic */ - pat adi $1==4 /* Add word (second + top) */ + pat adi $1==INT32 /* Add word (second + top) */ with REG REG yields {SUM_RR, %1, %2} with CONST REG @@ -1362,7 +941,8 @@ PATTERNS yields {SUM_RC, %2.reg, %2.off+%1.val} with CONST LABEL yields {LABEL, %2.adr+%1.val} - + +#if 0 pat sbi $1==4 /* Subtract word (second - top) */ with REG REG uses reusing %2, REG @@ -1504,19 +1084,16 @@ PATTERNS with STACK gen bl {LABEL, ".com"} +#endif pat sli $1==4 /* Shift left (second << top) */ - with CONST GPR + with CONST+GPR GPR uses reusing %2, REG gen - rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} + lsl %a, %2, %1 yields %a - with GPR GPR - uses reusing %2, REG - gen - slw %a, %2, %1 - yields %a - + +#if 0 pat sri $1==4 /* Shift right signed (second >> top) */ with CONST GPR uses reusing %2, REG @@ -1664,34 +1241,38 @@ PATTERNS move {LABEL, ".tge_table"}, %a lwzx %a, %a, SCRATCH yields %a - + +#endif /* Simple branches */ pat zeq /* Branch if signed top == 0 */ - with TRISTATE_ALL+GPR STACK + with GPR STACK gen - move %1, C0 - bc IFTRUE, EQ, {LABEL, $1} + cmp %1, {CONST, 0} + beq {LABEL, $1} pat beq - leaving - cmi INT32 - zeq $1 - - pat zne /* Branch if signed top != 0 */ - with TRISTATE_ALL+GPR STACK + with GPR GPR STACK gen - move %1, C0 - bc IFFALSE, EQ, {LABEL, $1} + cmp %1, %2 + beq {LABEL, $1} + + pat zne /* Branch if signed top != 0 */ + with GPR STACK + gen + cmp %1, {CONST, 0} + bne {LABEL, $1} pat bne - leaving - cmi INT32 - zne $1 - + with GPR GPR STACK + gen + cmp %1, %2 + bne {LABEL, $1} + +#if 0 pat zgt /* Branch if signed top > 0 */ with TRISTATE_ALL+GPR STACK gen @@ -1735,8 +1316,10 @@ PATTERNS leaving cmi INT32 zle $1 +#endif +#if 0 /* Compare and jump */ pat cmi /* Signed tristate compare */ @@ -1780,17 +1363,13 @@ PATTERNS kills ALL gen labeldef $1 - - pat bra topeltsize($1)==4 /* Unconditional jump with TOS GPRister */ - with GPR3 STACK - gen - b {LABEL, $1} - - pat bra topeltsize($1)!=4 /* Unconditional jump without TOS GPRister */ +#endif + + pat bra /* Unconditional jump */ with STACK gen b {LABEL, $1} - + /* Miscellaneous */ @@ -1805,49 +1384,52 @@ PATTERNS with GPR STACK kills ALL gen - mtspr CTR, %1 - bcctrl ALWAYS, {CONST, 0}, {CONST, 0} - + bl %1 + pat lfr $1==INT32 /* Load function result, word */ - yields R3 + yields R0 pat lfr $1==INT64 /* Load function result, double-word */ - yields R4 R3 - + yields R0 R1 + pat ret $1==0 /* Return from procedure */ gen return - b {LABEL, ".ret"} - + mov SP, FP + pop FP, PC + pat ret $1==INT32 /* Return from procedure, word */ - with GPR3 + with GPR0 gen return - b {LABEL, ".ret"} + mov SP, FP + pop FP, PC pat ret $1==INT64 /* Return from procedure, double-word */ - with GPR3 GPR4 + with GPR0 GPR1 gen return - b {LABEL, ".ret"} + mov SP, FP + pop FP, PC +#if 0 pat blm /* Block move constant length */ with GPR GPR STACK uses REG gen move {CONST, $1}, %a - stwu %a, {GPRINDIRECT, SP, 0-4} - stwu %2, {GPRINDIRECT, SP, 0-4} - stwu %1, {GPRINDIRECT, SP, 0-4} + stwu %a, {GPROFFSET, SP, 0-4} + stwu %2, {GPROFFSET, SP, 0-4} + stwu %1, {GPROFFSET, SP, 0-4} bl {LABEL, "_memmove"} addi SP, SP, {CONST, 12} pat bls /* Block move variable length */ with GPR GPR GPR STACK gen - stwu %1, {GPRINDIRECT, SP, 0-4} - stwu %3, {GPRINDIRECT, SP, 0-4} - stwu %2, {GPRINDIRECT, SP, 0-4} + stwu %1, {GPROFFSET, SP, 0-4} + stwu %3, {GPROFFSET, SP, 0-4} + stwu %2, {GPROFFSET, SP, 0-4} bl {LABEL, "_memmove"} addi SP, SP, {CONST, 12} @@ -1913,7 +1495,7 @@ PATTERNS with GPR uses reusing %1, REG gen - lwz %a, {GPRINDIRECT, %1, FP_OFFSET} + lwz %a, {GPROFFSET, %1, FP_OFFSET} yields %a pat lpb /* Convert FP to argument address */ @@ -1929,9 +1511,9 @@ PATTERNS uses REG gen move {LABEL, $1}, %a - move {IND_RC_W, %a, 8}, FP - move {IND_RC_W, %a, 4}, SP - move {IND_RC_W, %a, 0}, %a + move {IND_RC_Q, %a, 8}, FP + move {IND_RC_Q, %a, 4}, SP + move {IND_RC_Q, %a, 0}, %a mtspr CTR, %a bcctr ALWAYS, {CONST, 0}, {CONST, 0} @@ -1952,6 +1534,7 @@ PATTERNS gen wspec {CONST, $1} +#endif #endif pat lor $1==0 /* Load FP */ @@ -1983,17 +1566,17 @@ PATTERNS pat str $1==2 /* Store HP */ leaving ste ".reghp" - + pat ass /* Adjust stack by variable amount */ with CONST gen - move {SUM_RC, SP, %1.val}, {GPRE, SP} + move {SUM_RC, SP, %1.val}, SP with GPR gen - move {SUM_RR, SP, %1}, {GPRE, SP} + move {SUM_RR, SP, %1}, SP pat asp /* Adjust stack by constant amount */ leaving loc $1 ass - + From 11890026db9f4a2b3978284fa493d383020015a9 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 19 May 2013 23:34:42 +0100 Subject: [PATCH 12/76] Push/pop are the right way round. Don't corrup short-form ALU instructions. Correct encoding of push/pop register ranges. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach3.c | 4 ++-- mach/vc4/as/mach5.c | 31 +++++++++++++++++-------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index aba49dca2..f47c024dd 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -133,8 +133,8 @@ 0, OP_MISCL, B16(11000100,10000000), "divs", 0, OP_MISCL, B16(11000100,11100000), "divu", -0, OP_STACK, B16(00000010,00000000), "push", -0, OP_STACK, B16(00000010,10000000), "pop", +0, OP_STACK, B16(00000010,10000000), "push", +0, OP_STACK, B16(00000010,00000000), "pop", 0, OP_MEM, B8(00000000), "ld", 0, OP_MEM, B8(00000001), "st", diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 056a8b7aa..6314ed1f3 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -13,7 +13,7 @@ void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) { /* Can we use short form? */ - if ((cc == ALWAYS) && (ra == rd)) + if ((cc == ALWAYS) && (ra == rd) && (ra < 0x10) && (rb < 0x10)) { emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0)); return; @@ -32,7 +32,7 @@ void alu_instr_lit(quad op, int cc, int rd, int ra, quad value) /* 16 bit short form? */ if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) && - !(ra & 0x10)) + (ra < 0x10)) { emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0)); return; @@ -162,6 +162,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) void stack_instr(quad opcode, int loreg, int hireg, int extrareg) { int b; + int m; switch (loreg) { @@ -172,15 +173,13 @@ void stack_instr(quad opcode, int loreg, int hireg, int extrareg) case 26: /* lr */ extrareg = 26; - hireg = 31; - loreg = 0; + hireg = loreg = -1; b = 0; break; case 31: /* pc */ extrareg = 31; - hireg = 31; - loreg = 0; + hireg = loreg = -1; b = 0; break; @@ -189,23 +188,27 @@ void stack_instr(quad opcode, int loreg, int hireg, int extrareg) } if (opcode & 0x0080) - { - /* Pop */ - if (extrareg == 26) - serror("cannot pop lr"); - } - else { /* Push */ if (extrareg == 31) serror("cannot push pc"); } + else + { + /* Pop */ + if (extrareg == 26) + serror("cannot pop lr"); + } if (hireg < loreg) serror("invalid register range"); - emit2(opcode | (b<<5) | (hireg<<0) | - ((extrareg != -1) ? 0x0100 : 0)); + if (hireg == -1) + m = 31; + else + m = hireg - loreg; + + emit2(opcode | (b<<5) | (m<<0) | ((extrareg != -1) ? 0x0100 : 0)); } /* Memory operations where the offset is a fixed value (including zero). */ From 5082b2a5d71832284679a91607b53309d5e17642 Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 20 May 2013 19:56:33 +0100 Subject: [PATCH 13/76] Add lea instruction. Fix dependency issues. --HG-- branch : dtrg-videocore --- mach/proto/as/build.mk | 2 ++ mach/vc4/as/mach1.c | 2 ++ mach/vc4/as/mach2.c | 1 + mach/vc4/as/mach3.c | 1 + mach/vc4/as/mach4.c | 3 +++ mach/vc4/as/mach5.c | 23 +++++++++++++++++++++++ 6 files changed, 32 insertions(+) diff --git a/mach/proto/as/build.mk b/mach/proto/as/build.mk index 64eaab1e9..791611f3d 100644 --- a/mach/proto/as/build.mk +++ b/mach/proto/as/build.mk @@ -26,7 +26,9 @@ define build-as-impl $(eval CLEANABLES += $(OBJDIR)/$D/preprocessed-comm2.y) $(OBJDIR)/$D/preprocessed-comm2.y: mach/proto/as/comm2.y $(CPPANSI) \ + mach/$(ARCH)/as/mach1.c \ mach/$(ARCH)/as/mach2.c \ + mach/$(ARCH)/as/mach3.c \ mach/$(ARCH)/as/mach4.c @echo PREPROCESS $$@ @mkdir -p $$(dir $$@) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 440d7de97..de164610e 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -23,3 +23,5 @@ extern void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct e extern void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr); extern void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr); extern void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr); +extern void lea_stack_instr(int rd, long va, int rs); +extern void lea_address_instr(int rd, struct expr_t* expr); \ No newline at end of file diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index 8143d080b..4c02efb39 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -18,5 +18,6 @@ %token OP_MISC %token OP_MISCL %token OP_STACK +%token OP_LEA diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index f47c024dd..72c0c2a88 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -145,3 +145,4 @@ 0, OP_MEM, B8(00000110), "ldhs", 0, OP_MEM, B8(00000111), "sths", +0, OP_LEA, 0, "lea", \ No newline at end of file diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index e9593e761..73cbea8e4 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -71,5 +71,8 @@ operation | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, $2, $3, $6); } | OP_MEM GPR ',' expr { mem_address_instr($1, $2, &$4); } + + | OP_LEA GPR ',' absexp '(' GPR ')' { lea_stack_instr($2, $4, $6); } + | OP_LEA GPR ',' expr { lea_address_instr($2, &$4); } ; diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 6314ed1f3..d6f71e7fb 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -475,3 +475,26 @@ void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t branch_addcmp_common(B16(11000000,00000000) | (vs<<8), 8, expr); } +/* lea, where the source is relative to the stack. */ + +void lea_stack_instr(int rd, long va, int rs) +{ + if (rs != 25) + serror("source register must be sp"); + + if (!fitx(va, 6)) + serror("offset too big to encode in instruction"); + va = maskx(va, 6); + + emit2(B16(00010000,00000000) | (rd<<0) | (va<<5)); +} + +/* lea, where the source is an address. */ + +void lea_address_instr(int rd, struct expr_t* expr) +{ + newrelo(expr->typ, RELOVC4); + emit2(B16(11100101,00000000) | (rd<<0)); + emit4(expr->val); +} + From 970f2bae6222b98d825af151f2d467f5893a8590 Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 20 May 2013 22:35:12 +0100 Subject: [PATCH 14/76] Major revamp to simplify and use 2op instructions. Better code. Now looks like it may work one day. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 656 +++++++++++++-------------------------------- 1 file changed, 181 insertions(+), 475 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 691d6076b..3dc18a678 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -28,43 +28,34 @@ PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ - LREG /* any allocatable low register (r0-r15) */ - HREG /* any allocatable high register (r0-r15) */ STACKABLE /* a push/popable register (r0, r6, r16, fp) */ GPR0 GPR1 GPR2 GPR3 GPR4 GPR5 GPR6 GPR7 GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23 - GPRFP GPRSP GPRLR GPRPC + GPRGP GPRFP GPRSP GPRLR GPRPC REGISTERS - R0("r0") : GPR, REG, LREG, STACKABLE, GPR0. - R1("r1") : GPR, REG, LREG, GPR1. - R2("r2") : GPR, REG, LREG, GPR2. - R3("r3") : GPR, REG, LREG, GPR3. - R4("r4") : GPR, REG, LREG, GPR4. - R5("r5") : GPR, REG, LREG, GPR5. - R6("r6") : GPR, REG, LREG, STACKABLE, GPR6 regvar. - R7("r7") : GPR, REG, LREG, GPR7 regvar. - R8("r8") : GPR, REG, LREG, GPR8 regvar. - R9("r9") : GPR, REG, LREG, GPR9 regvar. - R10("r10") : GPR, REG, LREG, GPR10 regvar. - R11("r11") : GPR, REG, LREG, GPR11 regvar. - R12("r12") : GPR, REG, LREG, GPR12 regvar. - R13("r13") : GPR, REG, LREG, GPR13 regvar. - R14("r14") : GPR, REG, LREG, GPR14 regvar. - R15("r15") : GPR, REG, LREG, GPR15 regvar. + R0("r0") : GPR, REG, STACKABLE, GPR0. + R1("r1") : GPR, REG, GPR1. + R2("r2") : GPR, REG, GPR2. + R3("r3") : GPR, REG, GPR3. + R4("r4") : GPR, REG, GPR4. + R5("r5") : GPR, REG, GPR5. + R6("r6") : GPR, REG, STACKABLE, GPR6 regvar. + R7("r7") : GPR, REG, GPR7 regvar. + R8("r8") : GPR, REG, GPR8 regvar. + R9("r9") : GPR, REG, GPR9 regvar. + R10("r10") : GPR, REG, GPR10 regvar. + R11("r11") : GPR, REG, GPR11 regvar. + R12("r12") : GPR, REG, GPR12 regvar. + R13("r13") : GPR, REG, GPR13 regvar. + R14("r14") : GPR, REG, GPR14 regvar. + GP("r15") : GPR, GPRGP. - R16("r16") : GPR, REG, HREG, STACKABLE, GPR16 regvar. - R17("r17") : GPR, REG, HREG, GPR17 regvar. - R18("r18") : GPR, REG, HREG, GPR18 regvar. - R19("r19") : GPR, REG, HREG, GPR19 regvar. - R20("r20") : GPR, REG, HREG, GPR20 regvar. - R21("r21") : GPR, REG, HREG, GPR21 regvar. - R22("r22") : GPR, REG, HREG, GPR22 regvar. - R23("r23") : GPR, GPR23. + R23("r23") : GPR. FP("fp") : GPR, GPRFP, STACKABLE. SP("sp") : GPR, GPRSP. LR("lr") : GPR, GPRLR. @@ -84,94 +75,61 @@ TOKENS LABEL = { ADDR adr; } 4 adr. CONST = { INT val; } 4 "#" val. - LOCAL = { INT off; } 4. /* Allows us to use regvar() to refer to registers */ GPRE = { GPR reg; } 4 reg. -/* Expression partial results */ - - SUM_RC = { GPR reg; INT off; } 4. - SUM_RR = { GPR reg1; GPR reg2; } 4. - - SEX_B = { GPR reg; } 4. - SEX_H = { GPR reg; } 4. - - IND_RC_B = { GPR reg; INT off; } 4. - IND_RR_B = { GPR reg1; GPR reg2; } 4. - IND_LABEL_B = { ADDR adr; } 4. - - IND_RC_H = { GPR reg; INT off; } 4. - IND_RR_H = { GPR reg1; GPR reg2; } 4. - IND_LABEL_H = { ADDR adr; } 4. - - IND_RC_H_S = { GPR reg; INT off; } 4. - - IND_RC_Q = { GPR reg; INT off; } 4. - IND_RR_Q = { GPR reg1; GPR reg2; } 4. - IND_LABEL_Q = { ADDR adr; } 4. - - IND_RC_D = { GPR reg; INT off; } 8. - IND_RR_D = { GPR reg1; GPR reg2; } 8. - IND_LABEL_D = { ADDR adr; } 8. - -/* Comments */ - - LABELI = { ADDR msg; INT num; } 4 msg " " num. +/* The results of comparisons. */ + TRISTATE_RC_S = { GPR reg; INT val; } 4. + TRISTATE_RC_U = { GPR reg; INT val; } 4. + TRISTATE_RR_S = { GPR reg1; GPR reg2; } 4. + TRISTATE_RR_U = { GPR reg1; GPR reg2; } 4. SETS - TOKEN = LABEL + CONST + LOCAL. + TOKEN = LABEL + CONST. GPRI = GPR + GPRE. - - SUM_ALL = SUM_RC + SUM_RR. - - SEX_ALL = SEX_B + SEX_H. - - IND_ALL_B = IND_RC_B + IND_RR_B + IND_LABEL_B. - IND_ALL_H = IND_RC_H + IND_RR_H + IND_LABEL_H. - IND_ALL_Q = IND_RC_Q + IND_RR_Q + IND_LABEL_Q. - IND_ALL_D = IND_RC_D + IND_RR_D + IND_LABEL_D. -#if 0 - OP_ALL_Q = SUM_ALL + TRISTATE_ALL + SEX_ALL + LOGICAL_ALL + - IND_ALL_Q. -#endif - - OP_ALL_Q = SUM_ALL + SEX_ALL + IND_ALL_B + IND_ALL_H + IND_ALL_Q. INSTRUCTIONS add GPRI:wo, GPRI:ro, GPRI+CONST:ro. + add GPRI:rw, GPRI+CONST:ro. beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. + bgt "b.gt" LABEL:ro. + bgt "b.gt" LABEL:ro. + bhi "b.hi" LABEL:ro. b GPRI+LABEL:ro. bl GPRI+LABEL:ro. - cmp GPRI:ro, GPRI+CONST:ro. + cmp GPRI:ro, GPRI+CONST:ro kills :cc. exts GPRI:wo, GPRI:ro, GPRI+CONST:ro. + exts GPRI:rw, GPRI+CONST:ro. ld GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. ldb GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. ldh GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. ldhs GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. lea GPRI:wo, LABEL:ro. + lsl GPRI:rw, GPRI+CONST:ro. lsl GPRI:wo, GPRI:ro, GPRI+CONST:ro. mov GPRI:wo, GPRI+CONST:ro. + neg GPRI:rw, GPRI+CONST:ro. pop STACKABLE:wo. pop STACKABLE:wo, GPRLR+GPRPC:wo. push STACKABLE:ro. sub GPRI:wo, GPRI:ro, CONST+GPRI:ro. + sub GPRI:rw, GPRI+CONST:ro. st GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. stb GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. sth GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. sths GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. invalid "invalid". - comment "!" LABEL+LABELI:ro. @@ -185,173 +143,33 @@ MOVES /* GPRE exists solely to allow us to use regvar() (which can only be used in an expression) as a register constant. */ - from GPR to GPRE - gen - COMMENT("move GPR->GPRE") - mov %2, %1 - from GPRE to GPR gen - COMMENT("move GPRE->GPR") mov %2, %1 /* Constants */ from CONST to GPR gen - COMMENT("move CONST->GPR") mov %2, %1 from LABEL to GPR gen - COMMENT("move LABEL->GPR") lea %2, {LABEL, %1.adr} - -/* Sign extension */ - - from SEX_B to GPR - gen - COMMENT("move SEX_B->GPR") - exts %2, %1.reg, {CONST, 8} - - from SEX_H to GPR - gen - COMMENT("move SEX_H->GPR") - exts %2, %1.reg, {CONST, 16} - -/* Register + something */ - - from SUM_RC to GPR - gen - COMMENT("move SUM_RC->GPR") - add %2, %1.reg, {CONST, %1.off} - - from SUM_RR to GPR - gen - COMMENT("move SUM_RR->GPR") - add %2, %1.reg1, %1.reg2 - - from SUM_RR to GPR - gen - COMMENT("move SUM_RR->GPRE") - add %2, %1.reg1, %1.reg2 - -/* Read byte */ - - from IND_RC_B to GPR - gen - COMMENT("move IND_RC_B->GPR") - ldb %2, {GPROFFSET, %1.reg, %1.off} - - from IND_RR_B to GPR - gen - COMMENT("move IND_RR_B->GPR") - ldb %2, {GPRGPR, %1.reg1, %1.reg2} - - from IND_LABEL_B to GPR - gen - COMMENT("move IND_LABEL_B->GPR") - ldb %2, {LABEL, %1.adr} - -/* Write byte */ - - from GPR to IND_RC_B - gen - COMMENT("move GPR->IND_RC_B") - stb %1, {GPROFFSET, %2.reg, %2.off} - - from GPR to IND_RR_B - gen - COMMENT("move GPR->IND_RR_B") - stb %1, {GPRGPR, %2.reg1, %2.reg2} - - from GPR to IND_LABEL_B - gen - COMMENT("move GPR->IND_LABEL_B") - stb %1, {LABEL, %2.adr} - -/* Read short */ - - from IND_RC_H to GPR - gen - COMMENT("move IND_RC_H->GPR") - ldh %2, {GPROFFSET, %1.reg, %1.off} - - from IND_RR_H to GPR - gen - COMMENT("move IND_RR_H->GPR") - ldh %2, {GPRGPR, %1.reg1, %1.reg2} - - from IND_LABEL_H to GPR - gen - COMMENT("move IND_LABEL_H->GPR") - ldh %2, {LABEL, %1.adr} - -/* Write short */ - - from GPR to IND_RC_H - gen - COMMENT("move GPR->IND_RC_H") - sth %1, {GPROFFSET, %2.reg, %2.off} - - from GPR to IND_RR_H - gen - COMMENT("move GPR->IND_RR_H") - sth %1, {GPRGPR, %2.reg1, %2.reg2} - - from GPR to IND_LABEL_H - gen - COMMENT("move GPR->IND_LABEL_H") - sth %1, {LABEL, %2.adr} - -/* Read quad */ - - from IND_RC_Q to GPR - gen - COMMENT("move IND_RC_Q->GPR") - ld %2, {GPROFFSET, %1.reg, %1.off} - - from IND_RR_Q to GPR - gen - COMMENT("move IND_RR_Q->GPR") - ld %2, {GPRGPR, %1.reg1, %1.reg2} - - from IND_LABEL_Q to GPR - gen - COMMENT("move IND_LABEL_Q->GPR") - ld %2, {LABEL, %1.adr} - -/* Write quad */ - - from GPR to IND_RC_Q - gen - COMMENT("move GPR->IND_RC_Q") - st %1, {GPROFFSET, %2.reg, %2.off} - - from GPR to IND_RR_Q - gen - COMMENT("move GPR->IND_RR_Q") - st %1, {GPRGPR, %2.reg1, %2.reg2} - - from GPR to IND_LABEL_Q - gen - COMMENT("move GPR->IND_LABEL_Q") - st %1, {LABEL, %2.adr} + sub %2, GP /* Miscellaneous */ - from CONST + LABEL + GPR + OP_ALL_Q to GPRE + from CONST+LABEL+GPR+GPRE to GPRE gen move %1, %2.reg -#if 0 TESTS to test GPR gen - invalid -#endif + cmp %1, {CONST, 0} @@ -359,54 +177,48 @@ STACKINGRULES from STACKABLE to STACK gen - COMMENT("stack STACKABLE") push %1 - from REG to STACK + from GPR to STACK uses STACKABLE gen - COMMENT("stack non-STACKABLE") move %1, %a push %a - from REG to STACK + from GPR to STACK gen - COMMENT("stack non-STACKABLE, fallback") sub SP, SP, {CONST, 4} st %1, {GPROFFSET, SP, 0} - from CONST + OP_ALL_Q to STACK + from GPRE to STACK + uses STACKABLE + gen + move %1, %a + push %a + + from GPRE to STACK + gen + sub SP, {CONST, 4} + st %1, {GPROFFSET, SP, 0} + + from TOKEN to STACK uses STACKABLE gen move %1, %a push %a - from CONST + OP_ALL_Q to STACK + from TOKEN to STACK gen COMMENT("fallback stack") move %1, SCRATCH sub SP, SP, {CONST, 4} st SCRATCH, {GPROFFSET, SP, 0} - - from TOKEN to STACK - gen - invalid. COERCIONS - from REG - uses REG - gen - COMMENT("coerce REG->REG") - move %1, %a - yields %a - from GPRE - uses REG - gen - COMMENT("coerce GPRE->REG") - move %1, %a + uses reusing %1, REG=%1 yields %a from CONST @@ -430,40 +242,6 @@ COERCIONS pop %a yields %a - from SEX_B - uses REG - gen - COMMENT("coerce SEX_B->REG") - exts %a, %1.reg, {CONST, 8} - yields %a - - from SEX_H - uses REG - gen - COMMENT("coerce SEX_H->REG") - exts %a, %1.reg, {CONST, 16} - yields %a - -#if 0 - from SUM_ALL + TRISTATE_ALL + LOGICAL_ALL - uses REG - gen - move %1, {GPRE, %a} - yields %a - - from IND_ALL_Q - uses REG - gen - move %1, %a - yields %a -#endif - from OP_ALL_Q - uses REG - gen - move %1, %a - yields %a - - PATTERNS @@ -473,26 +251,28 @@ PATTERNS pat loc /* Load constant */ yields {CONST, $1} - pat dup $1==INT32 /* Duplicate word on top of stack */ - with REG + pat dup $1<=INT32 /* Duplicate word on top of stack */ + with GPR yields %1 %1 - + pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with REG REG + with GPR GPR yields %2 %1 %2 %1 pat exg $1==INT32 /* Exchange top two words on stack */ - with REG REG + with GPR GPR yields %1 %2 - + +#if 0 pat stl lol $1==$2 /* Store then load local */ leaving - dup 4 + dup INT32 stl $1 +#endif pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ leaving - dup INT32 + dup $2 lal $1 sti $2 @@ -537,30 +317,39 @@ PATTERNS pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */ /* nop */ - pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ + pat loc loc cii $1==INT8 && $2>INT8 /* signed char -> anything */ with REG - yields {SEX_B, %1} + uses reusing %1, REG=%1 + gen + exts %a, {CONST, 8} + yields %a - pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ + pat loc loc cii $1==INT16 && $2>INT16 /* signed short -> anything */ with REG - yields {SEX_H, %1} - + uses reusing %1, REG=%1 + gen + exts %a, {CONST, 16} + yields %a - /* Local variables */ pat lal /* Load address of local */ - yields {SUM_RC, FP, $1} + uses REG + gen + add %a, FP, {CONST, $1} + sub %a, GP + yields %a pat lol inreg($1)>0 /* Load from local */ - yields {LOCAL, $1} - - pat lol /* Load from local */ - leaving - lal $1 - loi INT32 + yields {GPRE, regvar($1)} + + pat lol /* Load quad from local */ + uses REG + gen + ld %a, {GPROFFSET, FP, $1} + yields %a pat ldl /* Load double-word from local */ leaving @@ -568,16 +357,16 @@ PATTERNS loi INT32*2 pat stl inreg($1)>0 /* Store to local */ - with CONST + LABEL + GPR + OP_ALL_Q - kills regvar($1), LOCAL %off==$1 + with CONST+GPRI + kills regvar($1) gen move %1, {GPRE, regvar($1)} pat stl /* Store to local */ - leaving - lal $1 - sti INT32 - + with GPRI + gen + st %1, {GPROFFSET, FP, $1} + pat sdl /* Store double-word to local */ leaving lal $1 @@ -704,11 +493,15 @@ PATTERNS pat loi $1==INT8 /* Load byte indirect */ with GPR - yields {IND_RC_B, %1, 0} - with SUM_RR - yields {IND_RR_B, %1.reg1, %1.reg2} - with SUM_RC - yields {IND_RC_B, %1.reg, %1.off} + uses reusing %1, REG + gen + ldb %a, {GPRGPR, %1, GP} + yields %a + with GPRE + uses reusing %1.reg, REG + gen + ldb %a, {GPRGPR, %1.reg, GP} + yields %a #if 0 pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ @@ -748,13 +541,11 @@ PATTERNS pat loi $1==INT32 /* Load quad indirect */ with GPR - yields {IND_RC_Q, %1, 0} - with SUM_RC - yields {IND_RC_Q, %1.reg, %1.off} - with SUM_RR - yields {IND_RR_Q, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_Q, %1.adr} + uses reusing %1, REG + gen + add %a, %1, GP + ld %a, {GPROFFSET, %a, 0} + yields %a #if 0 pat loi $1==INT64 /* Load double-quad indirect */ @@ -780,64 +571,26 @@ PATTERNS bl {LABEL, ".los"} pat sti $1==INT8 /* Store byte indirect */ - with GPR GPR + with GPR GPRI gen - move %2, {IND_RC_B, %1, 0} - with SUM_RR GPR + stb %2, {GPRGPR, %1, GP} + with GPRE GPRI gen - move %2, {IND_RR_B, %1.reg1, %1.reg2} - with SUM_RC GPR - gen - move %2, {IND_RC_B, %1.reg, %1.off} - with GPR SEX_B - gen - move %2.reg, {IND_RC_B, %1, 0} - with SUM_RR SEX_B - gen - move %2.reg, {IND_RR_B, %1.reg1, %1.reg2} - with SUM_RC SEX_B - gen - move %2.reg, {IND_RC_B, %1.reg, %1.off} - with LABEL GPR - gen - move %2, {IND_LABEL_B, %1.adr} + stb %2, {GPRGPR, %1.reg, GP} pat sti $1==INT16 /* Store half-word indirect */ with GPR GPR + uses REG gen - move %2, {IND_RC_H, %1, 0} - with SUM_RR GPR - gen - move %2, {IND_RR_H, %1.reg1, %1.reg2} - with SUM_RC GPR - gen - move %2, {IND_RC_H, %1.reg, %1.off} - with GPR SEX_H - gen - move %2.reg, {IND_RC_H, %1, 0} - with SUM_RR SEX_H - gen - move %2.reg, {IND_RR_H, %1.reg1, %1.reg2} - with SUM_RC SEX_H - gen - move %2.reg, {IND_RC_H, %1.reg, %1.off} - with LABEL GPR - gen - move %2, {IND_LABEL_H, %1.adr} + add %a, %1, GP + sth %2, {GPROFFSET, %a, 0} pat sti $1==INT32 /* Store quad indirect */ with GPR GPR + uses REG gen - move %2, {IND_RC_Q, %1, 0} - with SUM_RR GPR - gen - move %2, {IND_RR_Q, %1.reg1, %1.reg2} - with SUM_RC GPR - gen - move %2, {IND_RC_Q, %1.reg, %1.off} - with LABEL GPR - gen - move %2, {IND_LABEL_Q, %1.adr} + add %a, %1, GP + st %2, {GPROFFSET, %a, 0} #if 0 pat sti $1==INT64 /* Store double-word indirect */ @@ -931,38 +684,32 @@ PATTERNS /* Word arithmetic */ pat adi $1==INT32 /* Add word (second + top) */ - with REG REG - yields {SUM_RR, %1, %2} - with CONST REG - yields {SUM_RC, %2, %1.val} - with REG CONST - yields {SUM_RC, %1, %2.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off+%1.val} - with CONST LABEL - yields {LABEL, %2.adr+%1.val} + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + add %a, %1 + yields %a + with GPRI GPRI+CONST + uses reusing %1, REG=%1 + gen + add %a, %2 + yields %a + + pat sbi $1==INT32 /* Subtract word (second - top) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + sub %a, %1 + yields %a + + pat ngi $1==INT32 /* Negate word */ + with GPRI + uses reusing %1, REG=%1 + gen + neg %a, %a + yields %a #if 0 - pat sbi $1==4 /* Subtract word (second - top) */ - with REG REG - uses reusing %2, REG - gen - subf %a, %1, %2 - yields %a - with CONST REG - yields {SUM_RC, %2, 0-%1.val} - with CONST SUM_RC - yields {SUM_RC, %2.reg, %2.off-%1.val} - with CONST LABEL - yields {LABEL, %2.adr+(0-%1.val)} - - pat ngi $1==4 /* Negate word */ - with REG - uses reusing %1, REG - gen - neg %a, %1 - yields %a - pat mli $1==4 /* Multiply word (second * top) */ with REG REG uses reusing %2, REG @@ -1087,10 +834,10 @@ PATTERNS #endif pat sli $1==4 /* Shift left (second << top) */ - with CONST+GPR GPR - uses reusing %2, REG + with CONST+GPRI GPRI + uses reusing %2, REG=%2 gen - lsl %a, %2, %1 + lsl %a, %1 yields %a #if 0 @@ -1248,79 +995,40 @@ PATTERNS /* Simple branches */ - pat zeq /* Branch if signed top == 0 */ - with GPR STACK - gen - cmp %1, {CONST, 0} - beq {LABEL, $1} - - pat beq - with GPR GPR STACK - gen - cmp %1, %2 - beq {LABEL, $1} - - pat zne /* Branch if signed top != 0 */ - with GPR STACK + proc anyz example zeq + with GPRI STACK gen cmp %1, {CONST, 0} - bne {LABEL, $1} + beq[1] {LABEL, $1} - pat bne - with GPR GPR STACK + pat zeq call anyz("b.eq") /* Branch if signed top == 0 */ + pat zne call anyz("b.ne") /* Branch if signed top != 0 */ + pat zgt call anyz("b.gt") /* Branch if signed top > 0 */ + pat zlt call anyz("b.lt") /* Branch if signed top > 0 */ + + proc anyb example beq + with GPR+CONST GPRI STACK gen - cmp %1, %2 - bne {LABEL, $1} + cmp %2, %1 + beq[1] {LABEL, $1} + + pat beq call anyz("b.eq") /* Branch if signed second == top */ + pat bne call anyz("b.ne") /* Branch if signed second != top */ + pat bgt call anyz("b.gt") /* Branch if signed second > top */ + + proc anycmpb example cmu zeq + with GPR+CONST GPRI STACK + gen + cmp %2, %1 + beq[1] {LABEL, $2} + + pat cmu zgt call anycmpb("b.hi") /* Branch if unsigned second > top */ + pat cmu zlt call anycmpb("b.lo") /* Branch if unsigned second < top */ + pat cmu zge call anycmpb("b.hs") /* Branch if unsigned second >= top */ + pat cmu zle call anycmpb("b.ls") /* Branch if unsigned second <= top */ + #if 0 - pat zgt /* Branch if signed top > 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFTRUE, GT, {LABEL, $1} - - pat bgt - leaving - cmi INT32 - zgt $1 - - pat zge /* Branch if signed top >= 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFFALSE, LT, {LABEL, $1} - - pat bge - leaving - cmi INT32 - zge $1 - - pat zlt /* Branch if signed top < 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFTRUE, LT, {LABEL, $1} - - pat blt - leaving - cmi INT32 - zlt $1 - - pat zle /* Branch if signed top >= 0 */ - with TRISTATE_ALL+GPR STACK - gen - move %1, C0 - bc IFFALSE, GT, {LABEL, $1} - - pat ble - leaving - cmi INT32 - zle $1 -#endif - - -#if 0 -/* Compare and jump */ pat cmi /* Signed tristate compare */ with CONST GPR @@ -1333,6 +1041,7 @@ PATTERNS yields {TRISTATE_RC_U, %2, %1.val} with GPR GPR yields {TRISTATE_RR_U, %2, %1} +#endif pat cmp /* Compare pointers */ leaving @@ -1342,7 +1051,7 @@ PATTERNS leaving cmi INT32 - +#if 0 /* Other branching and labelling */ @@ -1554,27 +1263,24 @@ PATTERNS loe ".reghp" pat str $1==0 /* Store FP */ - with GPR + with GPRI gen - move %1, FP - + sub FP, %1, GP + pat str $1==1 /* Store SP */ - with GPR + with GPRI gen - move %1, SP - + sub SP, %1, GP + pat str $1==2 /* Store HP */ leaving ste ".reghp" pat ass /* Adjust stack by variable amount */ - with CONST - gen - move {SUM_RC, SP, %1.val}, SP - with GPR - gen - move {SUM_RR, SP, %1}, SP - + with CONST+GPRI + gen + add SP, %1 + pat asp /* Adjust stack by constant amount */ leaving loc $1 From e5341e41674fc43b820200980a535f551adcfc9e Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 20 May 2013 22:41:27 +0100 Subject: [PATCH 15/76] Remember to clear ackflags on reset. --HG-- branch : dtrg-videocore --- first/core.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/first/core.mk b/first/core.mk index d2e32fc68..b50549da3 100644 --- a/first/core.mk +++ b/first/core.mk @@ -3,6 +3,7 @@ define reset $(eval o :=) $(eval s :=) $(eval cflags :=) + $(eval ackflags :=) $(eval ldflags :=) $(eval objdir :=) endef From d6565f4d5b2317f37b9aa671986ce1ce944547a0 Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 20 May 2013 23:27:31 +0100 Subject: [PATCH 16/76] Fix typo. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index 72c0c2a88..0ae247226 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -54,7 +54,7 @@ 0, CC, 2, ".cs", 0, CC, 2, ".lo", 0, CC, 3, ".cc", -0, CC, 3, ".hg", +0, CC, 3, ".hs", 0, CC, 4, ".mi", 0, CC, 5, ".pl", 0, CC, 6, ".vs", From 6cdea73e849242e913a5c9599385088169e680ac Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 20 May 2013 23:27:45 +0100 Subject: [PATCH 17/76] Add a lot more opcodes. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 319 ++++++++++++++++++++------------------------- 1 file changed, 143 insertions(+), 176 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 3dc18a678..2168afb9b 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -13,6 +13,8 @@ INT8 = 1 /* Size of values */ INT16 = 2 INT32 = 4 INT64 = 8 +FLOAT32 = 4 +FLOAT64 = 8 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ @@ -100,6 +102,7 @@ INSTRUCTIONS add GPRI:wo, GPRI:ro, GPRI+CONST:ro. add GPRI:rw, GPRI+CONST:ro. + and GPRI:rw, GPRI+CONST:ro. beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. bgt "b.gt" LABEL:ro. @@ -108,6 +111,9 @@ INSTRUCTIONS b GPRI+LABEL:ro. bl GPRI+LABEL:ro. cmp GPRI:ro, GPRI+CONST:ro kills :cc. + divs GPRI:wo, GPRI:ro, GPRI+CONST:ro. + divu GPRI:wo, GPRI:ro, GPRI+CONST:ro. + eor GPRI:rw, GPRI+CONST:ro. exts GPRI:wo, GPRI:ro, GPRI+CONST:ro. exts GPRI:rw, GPRI+CONST:ro. ld GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. @@ -118,7 +124,9 @@ INSTRUCTIONS lsl GPRI:rw, GPRI+CONST:ro. lsl GPRI:wo, GPRI:ro, GPRI+CONST:ro. mov GPRI:wo, GPRI+CONST:ro. + mul GPRI:rw, GPRI+CONST:ro. neg GPRI:rw, GPRI+CONST:ro. + or GPRI:rw, GPRI+CONST:ro. pop STACKABLE:wo. pop STACKABLE:wo, GPRLR+GPRPC:wo. push STACKABLE:ro. @@ -547,17 +555,14 @@ PATTERNS ld %a, {GPROFFSET, %a, 0} yields %a -#if 0 pat loi $1==INT64 /* Load double-quad indirect */ - with GPR - yields {IND_RC_D, %1, 0} - with SUM_RC - yields {IND_RC_D, %1.reg, %1.off} - with SUM_RR - yields {IND_RR_D, %1.reg1, %1.reg2} - with LABEL - yields {IND_LABEL_D, %1.adr} -#endif + with GPRI + uses reusing %1, REG, REG + gen + add %a, %1, GP + ld %b, {GPROFFSET, %a, 4} + ld %a, {GPROFFSET, %a, 0} + yields %a %b pat loi /* Load arbitrary size */ leaving @@ -565,7 +570,7 @@ PATTERNS los INT32 pat los /* Load arbitrary size */ - with GPR0 GPR1 STACK + with STACK kills ALL gen bl {LABEL, ".los"} @@ -592,29 +597,13 @@ PATTERNS add %a, %1, GP st %2, {GPROFFSET, %a, 0} -#if 0 - pat sti $1==INT64 /* Store double-word indirect */ - with GPR FD + pat sti $1==INT64 /* Store double-quad indirect */ + with GPR GPR + uses REG gen - move %2, {IND_RC_D, %1, 0} - with SUM_RR FD - gen - move %2, {IND_RR_D, %1.reg1, %1.reg2} - with SUM_RC FD - gen - move %2, {IND_RC_D, %1.reg, %1.off} - with GPR GPR GPR - gen - stw %2, {GPROFFSET, %1, 0} - stw %3, {GPROFFSET, %1, 4} - with SUM_RC GPR GPR - gen - move %2, {IND_RC_Q, %1.reg, %1.off} - move %3, {IND_RC_Q, %1.reg, %1.off+4} - with LABEL FD - gen - move %2, {IND_LABEL_D, %1.adr} - + add %a, %1, GP + st %1, {GPROFFSET, %a, 0} + st %2, {GPROFFSET, %a, 4} pat sti /* Store arbitrary size */ leaving @@ -622,12 +611,11 @@ PATTERNS sts INT32 pat sts /* Load arbitrary size */ - with GPR3 GPR4 STACK + with STACK kills ALL gen bl {LABEL, ".sts"} -#endif - + /* Arithmetic wrappers */ @@ -709,6 +697,65 @@ PATTERNS neg %a, %a yields %a + pat and $1==INT32 /* AND word */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + and %a, %1 + yields %a + with GPRI GPRI+CONST + uses reusing %1, REG=%1 + gen + and %a, %2 + yields %a + + pat ior $1==INT32 /* OR word */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + or %a, %1 + yields %a + with GPRI GPRI+CONST + uses reusing %1, REG=%1 + gen + or %a, %2 + yields %a + + pat xor $1==INT32 /* XOR word */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + eor %a, %1 + yields %a + with GPRI GPRI+CONST + uses reusing %1, REG=%1 + gen + eor %a, %2 + yields %a + + pat dvi $1==INT32 /* Divide word (second / top) */ + with GPRI GPRI + uses reusing %2, REG + gen + divs %a, %2, %1 + yields %a + + pat dvu $1==INT32 /* Divide unsigned word (second / top) */ + with GPRI GPRI + uses reusing %2, REG + gen + divu %a, %2, %1 + yields %a + + pat rmu $1==INT32 /* Remainder unsigned word (second % top) */ + with GPRI GPRI + uses REG + gen + divu %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + #if 0 pat mli $1==4 /* Multiply word (second * top) */ with REG REG @@ -717,20 +764,6 @@ PATTERNS mullw %a, %2, %1 yields %a - pat dvi $1==4 /* Divide word (second / top) */ - with REG REG - uses reusing %2, REG - gen - divw %a, %2, %1 - yields %a - - pat dvu $1==4 /* Divide unsigned word (second / top) */ - with REG REG - uses reusing %2, REG - gen - divwu %a, %2, %1 - yields %a - pat rmi $1==4 /* Remainder word (second % top) */ with REG REG uses REG @@ -740,56 +773,11 @@ PATTERNS subf %a, %a, %2 yields %a - pat rmu $1==4 /* Remainder unsigned word (second % top) */ - with REG REG - uses REG - gen - divwu %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a - - pat and $1==4 /* AND word */ - with GPR NOT_R - uses reusing %1, REG - gen - andc %a, %1, %2.reg - yields %a - with NOT_R GPR - uses reusing %1, REG - gen - andc %a, %2, %1.reg - yields %a - with GPR GPR - yields {AND_RR, %1, %2} - with GPR CONST - yields {AND_RC, %1, %2.val} - with CONST GPR - yields {AND_RC, %2, %1.val} - pat and !defined($1) /* AND set */ with STACK gen bl {LABEL, ".and"} - pat ior $1==4 /* OR word */ - with GPR NOT_R - uses reusing %1, REG - gen - orc %a, %1, %2.reg - yields %a - with NOT_R GPR - uses reusing %2, REG - gen - orc %a, %2, %1.reg - yields %a - with GPR GPR - yields {OR_RR, %1, %2} - with GPR CONST - yields {OR_RC, %1, %2.val} - with CONST GPR - yields {OR_RC, %2, %1.val} - pat ior !defined($1) /* OR set */ with STACK gen @@ -924,72 +912,27 @@ PATTERNS with GPR3 STACK gen bl {LABEL, ".inn"} +#endif /* Boolean resolutions */ - pat teq /* top = (top == 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG + proc anyt example teq + with GPRI + uses reusing %1, REG=%1 gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".teq_table"}, %a - lwzx %a, %a, SCRATCH - yields %a - - pat tne /* top = (top != 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG - gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tne_table"}, %a - lwzx %a, %a, SCRATCH - yields %a - - pat tlt /* top = (top < 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG - gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tlt_table"}, %a - lwzx %a, %a, SCRATCH - yields %a - - pat tle /* top = (top <= 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG - gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tle_table"}, %a - lwzx %a, %a, SCRATCH - yields %a - - pat tgt /* top = (top > 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG - gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tgt_table"}, %a - lwzx %a, %a, SCRATCH + cmp %1, {CONST, 0} + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} yields %a - pat tge /* top = (top >= 0) */ - with TRISTATE_ALL + GPR - uses reusing %1, REG - gen - move %1, C0 - move C0, SCRATCH - move {LABEL, ".tge_table"}, %a - lwzx %a, %a, SCRATCH - yields %a - -#endif + pat cmu teq call anyt("add.eq") /* top = (top == 0) */ + pat cmu tne call anyt("add.ne") /* top = (top != 0) */ + pat cmu tlt call anyt("add.lo") /* top = unsigned (top < 0) */ + pat cmu tle call anyt("add.ls") /* top = unsigned (top <= 0) */ + pat cmu tgt call anyt("add.hi") /* top = unsigned (top > 0) */ + pat cmu tge call anyt("add.hs") /* top = unsigned (top >= 0) */ @@ -1004,7 +947,9 @@ PATTERNS pat zeq call anyz("b.eq") /* Branch if signed top == 0 */ pat zne call anyz("b.ne") /* Branch if signed top != 0 */ pat zgt call anyz("b.gt") /* Branch if signed top > 0 */ - pat zlt call anyz("b.lt") /* Branch if signed top > 0 */ + pat zlt call anyz("b.lt") /* Branch if signed top < 0 */ + pat zge call anyz("b.ge") /* Branch if signed top >= 0 */ + pat zle call anyz("b.le") /* Branch if signed top <= 0 */ proc anyb example beq with GPR+CONST GPRI STACK @@ -1015,6 +960,9 @@ PATTERNS pat beq call anyz("b.eq") /* Branch if signed second == top */ pat bne call anyz("b.ne") /* Branch if signed second != top */ pat bgt call anyz("b.gt") /* Branch if signed second > top */ + pat bge call anyz("b.ge") /* Branch if signed second >= top */ + pat blt call anyz("b.lt") /* Branch if signed second < top */ + pat ble call anyz("b.le") /* Branch if signed second <= top */ proc anycmpb example cmu zeq with GPR+CONST GPRI STACK @@ -1050,7 +998,22 @@ PATTERNS pat cms $1==INT32 /* Compare blocks (word sized) */ leaving cmi INT32 - + + proc anycmf64 example teq + with STACK + uses REG + gen + bl {LABEL, ".cmf8"} + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat cmf tlt $1==FLOAT64 call anyt("add.lo") /* top = unsigned (top < 0) */ + pat cmf tle $1==FLOAT64 call anyt("add.ls") /* top = unsigned (top <= 0) */ + pat cmf tgt $1==FLOAT64 call anyt("add.hi") /* top = unsigned (top > 0) */ + pat cmf tge $1==FLOAT64 call anyt("add.hs") /* top = unsigned (top >= 0) */ + + #if 0 @@ -1121,18 +1084,19 @@ PATTERNS mov SP, FP pop FP, PC -#if 0 pat blm /* Block move constant length */ - with GPR GPR STACK + with GPRI GPRI STACK uses REG gen - move {CONST, $1}, %a - stwu %a, {GPROFFSET, SP, 0-4} - stwu %2, {GPROFFSET, SP, 0-4} - stwu %1, {GPROFFSET, SP, 0-4} + sub SP, {CONST, 12} + mov %a, {CONST, $1} + st %1, {GPROFFSET, SP, 0} + st %2, {GPROFFSET, SP, 4} + st %a, {GPROFFSET, SP, 8} bl {LABEL, "_memmove"} - addi SP, SP, {CONST, 12} + add SP, {CONST, 12} +#if 0 pat bls /* Block move variable length */ with GPR GPR GPR STACK gen @@ -1141,16 +1105,17 @@ PATTERNS stwu %2, {GPROFFSET, SP, 0-4} bl {LABEL, "_memmove"} addi SP, SP, {CONST, 12} +#endif pat csa /* Array-lookup switch */ - with GPR3 GPR4 STACK + with STACK gen - b {LABEL, ".csa"} + bl {LABEL, ".csa"} pat csb /* Table-lookup switch */ - with GPR3 GPR4 STACK + with STACK gen - b {LABEL, ".csb"} + bl {LABEL, ".csb"} @@ -1179,7 +1144,7 @@ PATTERNS ste ".ignmask" pat trp /* Raise EM trap */ - with GPR3 + with GPR0 gen bl {LABEL, ".trap"} @@ -1204,7 +1169,8 @@ PATTERNS with GPR uses reusing %1, REG gen - lwz %a, {GPROFFSET, %1, FP_OFFSET} + ld %a, {GPROFFSET, %1, FP_OFFSET} + sub %a, GP yields %a pat lpb /* Convert FP to argument address */ @@ -1217,15 +1183,17 @@ PATTERNS lpb pat gto /* longjmp */ - uses REG + uses REG, REG gen move {LABEL, $1}, %a - move {IND_RC_Q, %a, 8}, FP - move {IND_RC_Q, %a, 4}, SP - move {IND_RC_Q, %a, 0}, %a - mtspr CTR, %a - bcctr ALWAYS, {CONST, 0}, {CONST, 0} - + ld %b, {GPROFFSET, %a, 8} + add FP, %b, GP + ld %b, {GPROFFSET, %a, 4} + add SP, %b, GP + ld %b, {GPROFFSET, %a, 0} + add %b, GP + b %b + #if 0 pat gto /* longjmp */ @@ -1243,7 +1211,6 @@ PATTERNS gen wspec {CONST, $1} -#endif #endif pat lor $1==0 /* Load FP */ From 877e06ed890a9d17146079c09ef34beb2f4ebd7d Mon Sep 17 00:00:00 2001 From: David Given Date: Tue, 21 May 2013 18:16:30 +0100 Subject: [PATCH 18/76] Lots more opcodes including float support. Define float and double to be the same thing (as the VC4 seems not to have double-precision float support). --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 445 ++++++++++++++++++++++++++------------------- plat/rpi/build.mk | 1 + plat/rpi/descr | 2 +- 3 files changed, 262 insertions(+), 186 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 2168afb9b..368c6177e 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -9,12 +9,9 @@ EM_WSIZE = 4 EM_PSIZE = 4 EM_BSIZE = 8 /* two words saved in call frame */ -INT8 = 1 /* Size of values */ -INT16 = 2 -INT32 = 4 -INT64 = 8 -FLOAT32 = 4 -FLOAT64 = 8 +BYTE = 1 /* Size of values */ +WORD = 2 +QUAD = 4 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ @@ -22,7 +19,7 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define COMMENT(n) /* noop */ -#define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) +#define nicesize(x) ((x)==BYTE || (x)==WORD || (x)==QUAD) @@ -103,6 +100,7 @@ INSTRUCTIONS add GPRI:wo, GPRI:ro, GPRI+CONST:ro. add GPRI:rw, GPRI+CONST:ro. and GPRI:rw, GPRI+CONST:ro. + asr GPRI:rw, GPRI+CONST:ro. beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. bgt "b.gt" LABEL:ro. @@ -116,6 +114,11 @@ INSTRUCTIONS eor GPRI:rw, GPRI+CONST:ro. exts GPRI:wo, GPRI:ro, GPRI+CONST:ro. exts GPRI:rw, GPRI+CONST:ro. + fadd GPRI:wo, GPRI:ro, GPRI:ro. + fcmp GPRI:wo, GPRI:ro, GPRI:ro. + fdiv GPRI:wo, GPRI:ro, GPRI:ro. + fmul GPRI:wo, GPRI:ro, GPRI:ro. + fsub GPRI:wo, GPRI:ro, GPRI:ro. ld GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. ldb GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. ldh GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. @@ -123,6 +126,7 @@ INSTRUCTIONS lea GPRI:wo, LABEL:ro. lsl GPRI:rw, GPRI+CONST:ro. lsl GPRI:wo, GPRI:ro, GPRI+CONST:ro. + lsr GPRI:rw, GPRI+CONST:ro. mov GPRI:wo, GPRI+CONST:ro. mul GPRI:rw, GPRI+CONST:ro. neg GPRI:rw, GPRI+CONST:ro. @@ -189,9 +193,9 @@ STACKINGRULES from GPR to STACK uses STACKABLE - gen - move %1, %a - push %a + gen + move %1, %a + push %a from GPR to STACK gen @@ -200,9 +204,9 @@ STACKINGRULES from GPRE to STACK uses STACKABLE - gen - move %1, %a - push %a + gen + move %1, %a + push %a from GPRE to STACK gen @@ -222,6 +226,7 @@ STACKINGRULES sub SP, SP, {CONST, 4} st SCRATCH, {GPROFFSET, SP, 0} + COERCIONS @@ -259,22 +264,22 @@ PATTERNS pat loc /* Load constant */ yields {CONST, $1} - pat dup $1<=INT32 /* Duplicate word on top of stack */ - with GPR + pat dup $1<=QUAD /* Duplicate word on top of stack */ + with GPRI yields %1 %1 - pat dup $1==INT64 /* Duplicate double-word on top of stack */ - with GPR GPR - yields %2 %1 %2 %1 - - pat exg $1==INT32 /* Exchange top two words on stack */ - with GPR GPR + pat dup $1<=(2*QUAD) /* Duplicate word pair on top of stack */ + with GPRI GPRI + yields %1 %2 %1 %2 + + pat exg $1==QUAD /* Exchange top two words on stack */ + with GPRI GPRI yields %1 %2 #if 0 pat stl lol $1==$2 /* Store then load local */ leaving - dup INT32 + dup QUAD stl $1 #endif @@ -298,7 +303,7 @@ PATTERNS loc $2 cii - pat loc loc cii loc loc cii $2==INT32 && $5==INT32 && $4<$2 /* madness, generated by the C compiler */ + pat loc loc cii loc loc cii $2==QUAD && $5==QUAD && $4<$2 /* madness, generated by the C compiler */ leaving loc $4 loc $5 @@ -319,20 +324,20 @@ PATTERNS pat loc loc cui $1==$2 /* unsigned X -> signed X */ /* nop */ - pat loc loc cui $1==INT8 && $2==INT32 /* unsigned char -> signed int */ + pat loc loc cui $1==BYTE && $2==QUAD /* unsigned char -> signed int */ /* nop */ - pat loc loc cui $1==INT16 && $2==INT32 /* unsigned short -> signed int */ + pat loc loc cui $1==WORD && $2==QUAD /* unsigned short -> signed int */ /* nop */ - pat loc loc cii $1==INT8 && $2>INT8 /* signed char -> anything */ + pat loc loc cii $1==BYTE && $2>BYTE /* signed char -> anything */ with REG uses reusing %1, REG=%1 gen exts %a, {CONST, 8} yields %a - pat loc loc cii $1==INT16 && $2>INT16 /* signed short -> anything */ + pat loc loc cii $1==WORD && $2>WORD /* signed short -> anything */ with REG uses reusing %1, REG=%1 gen @@ -362,7 +367,7 @@ PATTERNS pat ldl /* Load double-word from local */ leaving lal $1 - loi INT32*2 + loi QUAD*2 pat stl inreg($1)>0 /* Store to local */ with CONST+GPRI @@ -378,7 +383,7 @@ PATTERNS pat sdl /* Store double-word to local */ leaving lal $1 - sti INT32*2 + sti QUAD*2 pat lil inreg($1)>0 /* Load from indirected local */ uses REG @@ -389,12 +394,12 @@ PATTERNS pat lil /* Load from indirected local */ leaving lol $1 - loi INT32 + loi QUAD pat sil /* Save to indirected local */ leaving lol $1 - sti INT32 + sti QUAD pat stl lol $1==$2 /* Save then load (generated by C compiler) */ leaving @@ -433,22 +438,12 @@ PATTERNS pat loe /* Load word external */ leaving lae $1 - loi INT32 + loi QUAD pat ste /* Store word external */ leaving lae $1 - sti INT32 - - pat lde /* Load double-word external */ - leaving - lae $1 - loi INT64 - - pat sde /* Store double-word external */ - leaving - lae $1 - sti INT64 + sti QUAD pat zre /* Zero external */ leaving @@ -478,28 +473,19 @@ PATTERNS pat lof /* Load word offsetted */ leaving adp $1 - loi INT32 + loi QUAD - pat ldf /* Load double-word offsetted */ - leaving - adp $1 - loi INT64 pat stf /* Store word offsetted */ leaving adp $1 - sti INT32 + sti QUAD - pat sdf /* Store double-word offsetted */ - leaving - adp $1 - sti INT64 - /* Loads and stores */ - pat loi $1==INT8 /* Load byte indirect */ + pat loi $1==BYTE /* Load byte indirect */ with GPR uses reusing %1, REG gen @@ -512,7 +498,7 @@ PATTERNS yields %a #if 0 - pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ + pat loi loc loc cii $1==WORD && $2==WORD && $3==QUAD /* Load half-word indirect and sign extend */ with GPR uses REG gen @@ -529,7 +515,7 @@ PATTERNS move {IND_RC_H_S, %1.reg, %1.off}, %a yields %a - pat loi $1==INT16 /* Load half-word indirect */ + pat loi $1==WORD /* Load half-word indirect */ with GPR uses REG gen @@ -547,7 +533,7 @@ PATTERNS yields %a #endif - pat loi $1==INT32 /* Load quad indirect */ + pat loi $1==QUAD /* Load quad indirect */ with GPR uses reusing %1, REG gen @@ -555,19 +541,10 @@ PATTERNS ld %a, {GPROFFSET, %a, 0} yields %a - pat loi $1==INT64 /* Load double-quad indirect */ - with GPRI - uses reusing %1, REG, REG - gen - add %a, %1, GP - ld %b, {GPROFFSET, %a, 4} - ld %a, {GPROFFSET, %a, 0} - yields %a %b - pat loi /* Load arbitrary size */ leaving loc $1 - los INT32 + los QUAD pat los /* Load arbitrary size */ with STACK @@ -575,7 +552,7 @@ PATTERNS gen bl {LABEL, ".los"} - pat sti $1==INT8 /* Store byte indirect */ + pat sti $1==BYTE /* Store byte indirect */ with GPR GPRI gen stb %2, {GPRGPR, %1, GP} @@ -583,32 +560,24 @@ PATTERNS gen stb %2, {GPRGPR, %1.reg, GP} - pat sti $1==INT16 /* Store half-word indirect */ + pat sti $1==WORD /* Store half-word indirect */ with GPR GPR uses REG gen add %a, %1, GP sth %2, {GPROFFSET, %a, 0} - pat sti $1==INT32 /* Store quad indirect */ + pat sti $1==QUAD /* Store quad indirect */ with GPR GPR uses REG gen add %a, %1, GP st %2, {GPROFFSET, %a, 0} - pat sti $1==INT64 /* Store double-quad indirect */ - with GPR GPR - uses REG - gen - add %a, %1, GP - st %1, {GPROFFSET, %a, 0} - st %2, {GPROFFSET, %a, 4} - pat sti /* Store arbitrary size */ leaving loc $1 - sts INT32 + sts QUAD pat sts /* Load arbitrary size */ with STACK @@ -671,7 +640,7 @@ PATTERNS /* Word arithmetic */ - pat adi $1==INT32 /* Add word (second + top) */ + pat adi $1==QUAD /* Add word (second + top) */ with GPRI+CONST GPRI uses reusing %2, REG=%2 gen @@ -683,21 +652,21 @@ PATTERNS add %a, %2 yields %a - pat sbi $1==INT32 /* Subtract word (second - top) */ + pat sbi $1==QUAD /* Subtract word (second - top) */ with GPRI+CONST GPRI uses reusing %2, REG=%2 gen sub %a, %1 yields %a - pat ngi $1==INT32 /* Negate word */ + pat ngi $1==QUAD /* Negate word */ with GPRI uses reusing %1, REG=%1 gen neg %a, %a yields %a - pat and $1==INT32 /* AND word */ + pat and $1==QUAD /* AND word */ with GPRI+CONST GPRI uses reusing %2, REG=%2 gen @@ -709,7 +678,7 @@ PATTERNS and %a, %2 yields %a - pat ior $1==INT32 /* OR word */ + pat ior $1==QUAD /* OR word */ with GPRI+CONST GPRI uses reusing %2, REG=%2 gen @@ -721,7 +690,7 @@ PATTERNS or %a, %2 yields %a - pat xor $1==INT32 /* XOR word */ + pat xor $1==QUAD /* XOR word */ with GPRI+CONST GPRI uses reusing %2, REG=%2 gen @@ -733,21 +702,21 @@ PATTERNS eor %a, %2 yields %a - pat dvi $1==INT32 /* Divide word (second / top) */ + pat dvi $1==QUAD /* Divide word (second / top) */ with GPRI GPRI uses reusing %2, REG gen divs %a, %2, %1 yields %a - pat dvu $1==INT32 /* Divide unsigned word (second / top) */ + pat dvu $1==QUAD /* Divide unsigned word (second / top) */ with GPRI GPRI uses reusing %2, REG gen divu %a, %2, %1 yields %a - pat rmu $1==INT32 /* Remainder unsigned word (second % top) */ + pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ with GPRI GPRI uses REG gen @@ -796,7 +765,7 @@ PATTERNS gen bl {LABEL, ".xor"} - pat com $1==INT32 /* NOT word */ + pat com $1==QUAD /* NOT word */ with AND_RR uses REG gen @@ -828,60 +797,50 @@ PATTERNS lsl %a, %1 yields %a -#if 0 pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST GPR - uses reusing %2, REG + with CONST+GPRI GPRI + uses reusing %2, REG=%2 gen - srawi %a, %2, {CONST, %1.val & 0x1F} - yields %a - with GPR GPR - uses reusing %2, REG - gen - sraw %a, %2, %1 + asr %2, %1 yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST GPR - uses reusing %2, REG + with CONST+GPRI GPRI + uses reusing %2, REG=%2 gen - rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} - yields %a - with GPR GPR - uses reusing %2, REG - gen - srw %a, %2, %1 + lsr %2, %1 yields %a +#if 0 /* Arrays */ - pat aar $1==INT32 /* Index array */ + pat aar $1==QUAD /* Index array */ with GPR3 GPR4 GPR5 gen bl {LABEL, ".aar4"} yields R3 - pat lae lar $2==INT32 && nicesize(rom($1, 3)) /* Load array */ + pat lae lar $2==QUAD && nicesize(rom($1, 3)) /* Load array */ leaving lae $1 - aar INT32 + aar QUAD loi rom($1, 3) - pat lar $1==INT32 /* Load array */ + pat lar $1==QUAD /* Load array */ with GPR3 GPR4 GPR5 STACK kills ALL gen bl {LABEL, ".lar4"} - pat lae sar $2==INT32 && nicesize(rom($1, 3)) /* Store array */ + pat lae sar $2==QUAD && nicesize(rom($1, 3)) /* Store array */ leaving lae $1 - aar INT32 + aar QUAD sti rom($1, 3) - pat sar $1==INT32 /* Store array */ + pat sar $1==QUAD /* Store array */ with GPR3 GPR4 GPR5 STACK kills ALL gen @@ -895,8 +854,8 @@ PATTERNS pat set defined($1) /* Create word with set bit */ leaving loc 1 - exg INT32 - sli INT32 + exg QUAD + sli QUAD pat set !defined($1) /* Create structure with set bit (variable) */ with GPR3 GPR4 STACK @@ -905,8 +864,8 @@ PATTERNS pat inn defined($1) /* Test for set bit */ leaving - set INT32 - and INT32 + set QUAD + and QUAD pat inn !defined($1) /* Test for set bit (variable) */ with GPR3 STACK @@ -918,21 +877,59 @@ PATTERNS /* Boolean resolutions */ - proc anyt example teq - with GPRI - uses reusing %1, REG=%1 + proc cm_t example teq + with GPRI GPRI + uses reusing %1, REG gen - cmp %1, {CONST, 0} - mov %a, {CONST, 0} - add[1] %a, {CONST, 1} + cmp %1, %2 + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} yields %a - pat cmu teq call anyt("add.eq") /* top = (top == 0) */ - pat cmu tne call anyt("add.ne") /* top = (top != 0) */ - pat cmu tlt call anyt("add.lo") /* top = unsigned (top < 0) */ - pat cmu tle call anyt("add.ls") /* top = unsigned (top <= 0) */ - pat cmu tgt call anyt("add.hi") /* top = unsigned (top > 0) */ - pat cmu tge call anyt("add.hs") /* top = unsigned (top >= 0) */ + pat cmu teq call cm_t("add.eq") /* top = (second == top) */ + pat cmu tne call cm_t("add.ne") /* top = (second != top) */ + pat cmu tlt call cm_t("add.lo") /* top = unsigned (second < top) */ + pat cmu tle call cm_t("add.ls") /* top = unsigned (second <= top) */ + pat cmu tgt call cm_t("add.hi") /* top = unsigned (second < top) */ + pat cmu tge call cm_t("add.hs") /* top = unsigned (second >= top) */ + pat cmi teq call cm_t("add.eq") /* top = (second == top) */ + pat cmi tne call cm_t("add.ne") /* top = (second != top) */ + pat cmi tlt call cm_t("add.lt") /* top = signed (second < top) */ + pat cmi tle call cm_t("add.le") /* top = signed (second <= top) */ + pat cmi tgt call cm_t("add.gt") /* top = signed (second < top) */ + pat cmi tge call cm_t("add.ge") /* top = signed (second >= top) */ + + proc cmf_t example teq + with GPRI GPRI + uses reusing %1, REG + gen + fcmp %a, %1, %2 + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat cmf teq call cmf_t("add.eq") /* top = float (second == top) */ + pat cmf tne call cmf_t("add.ne") /* top = float (second != top) */ + pat cmf tlt call cmf_t("add.lo") /* top = float (second < top) */ + pat cmf tle call cmf_t("add.ls") /* top = float (second <= top) */ + pat cmf tgt call cmf_t("add.hi") /* top = float (second < top) */ + pat cmf tge call cmf_t("add.hs") /* top = float (second >= top) */ + + proc fallback_t example teq + with GPRI + uses reusing %1, REG + gen + cmp %1, {CONST, 0} + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat teq call fallback_t("add.eq") /* top = float (top == 0) */ + pat tne call fallback_t("add.ne") /* top = float (top != 0) */ + pat tlt call fallback_t("add.lo") /* top = float (top < 0) */ + pat tle call fallback_t("add.ls") /* top = float (top <= 0) */ + pat tgt call fallback_t("add.hi") /* top = float (top < 0) */ + pat tge call fallback_t("add.hs") /* top = float (top >= 0) */ @@ -964,16 +961,29 @@ PATTERNS pat blt call anyz("b.lt") /* Branch if signed second < top */ pat ble call anyz("b.le") /* Branch if signed second <= top */ - proc anycmpb example cmu zeq + proc cmu_z example cmu zeq with GPR+CONST GPRI STACK gen cmp %2, %1 beq[1] {LABEL, $2} - pat cmu zgt call anycmpb("b.hi") /* Branch if unsigned second > top */ - pat cmu zlt call anycmpb("b.lo") /* Branch if unsigned second < top */ - pat cmu zge call anycmpb("b.hs") /* Branch if unsigned second >= top */ - pat cmu zle call anycmpb("b.ls") /* Branch if unsigned second <= top */ + pat cmu zgt call cmu_z("b.hi") /* Branch if unsigned second > top */ + pat cmu zlt call cmu_z("b.lo") /* Branch if unsigned second < top */ + pat cmu zge call cmu_z("b.hs") /* Branch if unsigned second >= top */ + pat cmu zle call cmu_z("b.ls") /* Branch if unsigned second <= top */ + + proc cmf_z example cmu zeq + with GPRI GPRI STACK + gen + fcmp %2, %2, %1 + beq[1] {LABEL, $2} + + pat cmf zeq call cmf_z("b.eq") /* Branch if float second == top */ + pat cmf zne call cmf_z("b.ne") /* Branch if float second != top */ + pat cmf zgt call cmf_z("b.gt") /* Branch if float second > top */ + pat cmf zlt call cmf_z("b.lt") /* Branch if float second < top */ + pat cmf zge call cmf_z("b.ge") /* Branch if float second >= top */ + pat cmf zle call cmf_z("b.le") /* Branch if float second <= top */ #if 0 @@ -993,54 +1003,58 @@ PATTERNS pat cmp /* Compare pointers */ leaving - cmu INT32 + cmu QUAD - pat cms $1==INT32 /* Compare blocks (word sized) */ + pat cms $1==QUAD /* Compare blocks (word sized) */ leaving - cmi INT32 - - proc anycmf64 example teq - with STACK - uses REG - gen - bl {LABEL, ".cmf8"} - mov %a, {CONST, 0} - add[1] %a, {CONST, 1} - yields %a - - pat cmf tlt $1==FLOAT64 call anyt("add.lo") /* top = unsigned (top < 0) */ - pat cmf tle $1==FLOAT64 call anyt("add.ls") /* top = unsigned (top <= 0) */ - pat cmf tgt $1==FLOAT64 call anyt("add.hi") /* top = unsigned (top > 0) */ - pat cmf tge $1==FLOAT64 call anyt("add.hs") /* top = unsigned (top >= 0) */ + cmi QUAD + -#if 0 /* Other branching and labelling */ - pat lab topeltsize($1)==4 && !fallthrough($1) +#if 0 + pat lab topeltsize($1)<=4 && !fallthrough($1) gen labeldef $1 - yields R3 + yields R0 - pat lab topeltsize($1)==4 && fallthrough($1) - with GPR3 - gen - labeldef $1 - yields %1 + pat lab topeltsize($1)<=4 && fallthrough($1) + with GPR0 + gen + labeldef $1 + yields %1 - pat lab topeltsize($1)!=4 + pat lab topeltsize($1)>4 with STACK - kills ALL - gen - labeldef $1 + kills ALL + gen + labeldef $1 + + pat bra topeltsize($1)<=4 /* Unconditional jump with TOS register */ + with GPR0 STACK + gen + b {LABEL, $1} + + pat bra topeltsize($1)>4 /* Unconditional jump without TOS register */ + with STACK + gen + b {LABEL, $1} #endif - pat bra /* Unconditional jump */ + pat lab with STACK - gen - b {LABEL, $1} + kills ALL + gen + labeldef $1 + + pat bra + with STACK + gen + b {LABEL, $1} + @@ -1058,31 +1072,27 @@ PATTERNS gen bl %1 - pat lfr $1==INT32 /* Load function result, word */ + pat lfr $1==QUAD /* Load function result, word */ yields R0 - pat lfr $1==INT64 /* Load function result, double-word */ - yields R0 R1 - pat ret $1==0 /* Return from procedure */ gen return - mov SP, FP - pop FP, PC + mov SP, FP + pop FP, PC - pat ret $1==INT32 /* Return from procedure, word */ + pat ret $1<=QUAD /* Return from procedure, word */ with GPR0 - gen - return - mov SP, FP - pop FP, PC - - pat ret $1==INT64 /* Return from procedure, double-word */ - with GPR0 GPR1 - gen - return - mov SP, FP - pop FP, PC + gen + return + mov SP, FP + pop FP, PC + with STACK + gen + pop R0 + return + mov SP, FP + pop FP, PC pat blm /* Block move constant length */ with GPRI GPRI STACK @@ -1253,3 +1263,68 @@ PATTERNS loc $1 ass + + +/* Floating point */ + + pat ngf /* Negate float */ + leaving + loc 0 + exg QUAD + sbf QUAD + + proc simple_f example adf + with GPRI GPRI + uses reusing %1, REG + gen + fadd[1] %a, %2, %1 + yields %a + + pat adf call simple_f("fadd") /* Float subtract (second + top) */ + pat sbf call simple_f("fsub") /* Float subtract (second - top) */ + pat mlf call simple_f("fmul") /* Float multiply (second * top) */ + pat dvf call simple_f("fdiv") /* Float divide (second / top) */ + + pat loc loc cff $1==$2 && $1==QUAD /* Convert float to float */ + leaving + nop + + pat loc loc cfi $1==$2 && $1==QUAD /* Convert float -> integer */ + with GPR0 + gen + bl {LABEL, ".cfi"} + yields R0 + + pat loc loc cfu $1==$2 && $1==QUAD /* Convert float -> unsigned */ + with GPR0 + gen + bl {LABEL, ".cfu"} + yields R0 + + pat loc loc cif $1==$2 && $1==QUAD /* Convert integer -> float */ + with GPR0 + gen + bl {LABEL, ".cif"} + yields R0 + + pat loc loc cuf $1==$2 && $1==QUAD /* Convert unsigned -> float */ + with GPR0 + gen + bl {LABEL, ".cuf"} + yields R0 + + pat fef /* Split float */ + with GPR0 + kills GPR1 + gen + bl {LABEL, ".fef"} + yields R0 R1 + + pat fif /* Multiply float and split (?) */ + with GPRI GPRI + kills GPR0, GPR1 + gen + fmul R0, %2, %1 + bl {LABEL, ".fef"} + yields R0 R1 + diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 679be61ea..9716eb058 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -12,6 +12,7 @@ OPTIMISATION := -O D := plat/rpi/ platform-headers := \ + unistd.h \ ack/config.h platform-libsys := \ diff --git a/plat/rpi/descr b/plat/rpi/descr index fd9cc4dca..23862840d 100644 --- a/plat/rpi/descr +++ b/plat/rpi/descr @@ -7,7 +7,7 @@ var p=4 var s=2 var l=4 var f=4 -var d=8 +var d=4 var ARCH=vc4 var PLATFORM=rpi var PLATFORMDIR={EM}/share/ack/{PLATFORM} From 92817a6ad7beb7c4565a8c6d51fde04f47f47dd3 Mon Sep 17 00:00:00 2001 From: David Given Date: Tue, 21 May 2013 19:18:11 +0100 Subject: [PATCH 19/76] Allow platforms to specify size of long double; seperate alignment from size. --HG-- branch : dtrg-videocore --- lib/descr/fe | 2 +- plat/cpm/descr | 8 ++++++++ plat/linux386/descr | 14 +++++++++++--- plat/linux68k/descr | 14 +++++++++++--- plat/linuxppc/descr | 14 +++++++++++--- plat/pc86/descr | 8 ++++++++ plat/rpi/descr | 16 ++++++++++++---- 7 files changed, 62 insertions(+), 14 deletions(-) diff --git a/lib/descr/fe b/lib/descr/fe index a8d610362..4e85c64b3 100644 --- a/lib/descr/fe +++ b/lib/descr/fe @@ -84,7 +84,7 @@ name cem -DEM_SSIZE={s} -DEM_LSIZE={l} -DEM_FSIZE={f} -DEM_DSIZE={d}) \ -D_EM_WSIZE={w} -D_EM_PSIZE={p} \ -D_EM_SSIZE={s} -D_EM_LSIZE={l} -D_EM_FSIZE={f} -D_EM_DSIZE={d} \ - -Vw{w}.{w}i{w}.{w}p{p}.{w}f{f}.{w}s{s}.{s}l{l}.{w}d{d}.{w} \ + -Vw{w}.{wa}i{w}.{wa}p{p}.{pa}f{f}.{fa}s{s}.{sa}l{l}.{la}d{d}.{da}x{x}.{xa} \ {CC_ALIGN?} \ {CEM_F?} {LFLAG?} < > callname acc diff --git a/plat/cpm/descr b/plat/cpm/descr index a72f50276..2d626295a 100644 --- a/plat/cpm/descr +++ b/plat/cpm/descr @@ -3,11 +3,19 @@ # $Revision$ var w=2 +var wa=1 var p=2 +var pa=1 var s=2 +var sa=1 var l=4 +var la=1 var f=4 +var fa=1 var d=8 +var da=1 +var x=8 +var xa=1 var ARCH=i80 var PLATFORM=cpm var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linux386/descr b/plat/linux386/descr index ff2b26dfb..3bdd9d768 100644 --- a/plat/linux386/descr +++ b/plat/linux386/descr @@ -3,11 +3,19 @@ # $Revision$ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=i386 var PLATFORM=linux386 var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linux68k/descr b/plat/linux68k/descr index 14f973d52..997ec7154 100644 --- a/plat/linux68k/descr +++ b/plat/linux68k/descr @@ -3,11 +3,19 @@ # $Revision: 1.1 $ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=m68020 var PLATFORM=linux68k var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index 3311f24c6..69b00fbc5 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -3,11 +3,19 @@ # $Revision: 1.1 $ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=powerpc var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/pc86/descr b/plat/pc86/descr index a3f11fc4f..c282e9cc7 100644 --- a/plat/pc86/descr +++ b/plat/pc86/descr @@ -3,11 +3,19 @@ # $Revision$ var w=2 +var wa=1 var p=2 +var pa=1 var s=2 +var sa=1 var l=4 +var la=1 var f=4 +var fa=1 var d=8 +var da=1 +var x=8 +var xa=1 var ARCH=i86 var PLATFORM=pc86 var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/rpi/descr b/plat/rpi/descr index 23862840d..6daff9606 100644 --- a/plat/rpi/descr +++ b/plat/rpi/descr @@ -3,11 +3,19 @@ # $Revision$ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 -var d=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} +var d={w} +var da={w} +var x={w} +var xa={w} var ARCH=vc4 var PLATFORM=rpi var PLATFORMDIR={EM}/share/ack/{PLATFORM} From 1312fe298b0df5f5a8860c6acdb6020f8f74d024 Mon Sep 17 00:00:00 2001 From: David Given Date: Tue, 21 May 2013 20:05:26 +0100 Subject: [PATCH 20/76] Now compiles (incorrectly) the entire libc, libpc, libm2 and libbasic! --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 220 ++++++++++++++++++++++++++------------ plat/rpi/include/unistd.h | 20 ++++ 2 files changed, 173 insertions(+), 67 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 368c6177e..fd9a106e8 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -293,7 +293,8 @@ PATTERNS leaving dup 4 ste $1 - + + /* Type conversions */ @@ -450,22 +451,30 @@ PATTERNS loc 0 ste $1 -#if 0 pat ine /* Increment external */ - uses REG={LABEL, $1}, REG - gen - lwz %b, {GPROFFSET, %a, 0} - addi %b, %b, {CONST, 1} - stw %b, {GPROFFSET, %a, 0} - + leaving + lae $1 + dup QUAD + loi QUAD + inc + sti QUAD + pat dee /* Decrement external */ - uses REG={LABEL, $1}, REG - gen - lwz %b, {GPROFFSET, %a, 0} - addi %b, %b, {CONST, 0-1} - stw %b, {GPROFFSET, %a, 0} -#endif - + leaving + lae $1 + dup QUAD + loi QUAD + dec + sti QUAD + + pat lde /* Load external */ + uses REG, REG + gen + lea %a, {LABEL, $1} + ld %b, {GPROFFSET, %a, 4} + ld %a, {GPROFFSET, %a, 0} + yields %b %a + /* Structures */ @@ -474,13 +483,30 @@ PATTERNS leaving adp $1 loi QUAD - - + + pat ldf /* Load double offsetted */ + with GPRI + uses reusing %1, REG=%1, REG + gen + add %a, GP + ld %b, {GPROFFSET, %a, $1+4} + ld %a, {GPROFFSET, %a, $1+0} + yields %a %b + pat stf /* Store word offsetted */ leaving adp $1 sti QUAD - + + pat sdf /* Store double offsetted */ + with GPRI GPRI GPRI + uses reusing %3, REG=%3 + gen + add %a, GP + st %1, {GPROFFSET, %a, $1+0} + st %2, {GPROFFSET, %a, $1+4} + + /* Loads and stores */ @@ -659,6 +685,54 @@ PATTERNS sub %a, %1 yields %a + pat mli $1==QUAD /* Multiply word (second * top) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + mul %a, %1 + yields %a + with GPRI GPRI+CONST + uses reusing %1, REG=%1 + gen + mul %a, %2 + yields %a + + pat mlu + leaving + mli $1 + + pat dvi $1==QUAD /* Divide word (second / top) */ + with GPRI GPRI + uses reusing %2, REG + gen + divs %a, %2, %1 + yields %a + + pat dvu $1==QUAD /* Divide unsigned word (second / top) */ + with GPRI GPRI + uses reusing %2, REG + gen + divu %a, %2, %1 + yields %a + + pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ + with GPRI GPRI + uses REG + gen + divu %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + + pat rmi $1==QUAD /* Remainder signed word (second % top) */ + with GPRI GPRI + uses REG + gen + divs %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + pat ngi $1==QUAD /* Negate word */ with GPRI uses reusing %1, REG=%1 @@ -725,6 +799,15 @@ PATTERNS sub %a, %2 yields %a + pat rmi $1==QUAD /* Remainder signed word (second % top) */ + with GPRI GPRI + uses REG + gen + divs %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + #if 0 pat mli $1==4 /* Multiply word (second * top) */ with REG REG @@ -733,15 +816,7 @@ PATTERNS mullw %a, %2, %1 yields %a - pat rmi $1==4 /* Remainder word (second % top) */ - with REG REG - uses REG - gen - divw %a, %2, %1 - mullw %a, %a, %1 - subf %a, %a, %2 - yields %a - + pat and !defined($1) /* AND set */ with STACK gen @@ -810,30 +885,41 @@ PATTERNS gen lsr %2, %1 yields %a -#if 0 - + /* Arrays */ pat aar $1==QUAD /* Index array */ - with GPR3 GPR4 GPR5 + with STACK + uses GPR0 + gen + bl {LABEL, ".aar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 gen bl {LABEL, ".aar4"} - yields R3 - + yields R0 + pat lae lar $2==QUAD && nicesize(rom($1, 3)) /* Load array */ leaving lae $1 aar QUAD loi rom($1, 3) - + pat lar $1==QUAD /* Load array */ - with GPR3 GPR4 GPR5 STACK - kills ALL + with STACK + uses GPR0 + gen + bl {LABEL, ".lar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 gen bl {LABEL, ".lar4"} - + yields R0 + pat lae sar $2==QUAD && nicesize(rom($1, 3)) /* Store array */ leaving lae $1 @@ -841,11 +927,15 @@ PATTERNS sti rom($1, 3) pat sar $1==QUAD /* Store array */ - with GPR3 GPR4 GPR5 STACK - kills ALL + with STACK + uses GPR0 + gen + bl {LABEL, ".sar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 gen bl {LABEL, ".sar4"} - @@ -854,25 +944,30 @@ PATTERNS pat set defined($1) /* Create word with set bit */ leaving loc 1 - exg QUAD - sli QUAD - + exg $1 + sli $1 + pat set !defined($1) /* Create structure with set bit (variable) */ - with GPR3 GPR4 STACK + with STACK gen bl {LABEL, ".set"} - + pat inn defined($1) /* Test for set bit */ leaving set QUAD and QUAD - + pat inn !defined($1) /* Test for set bit (variable) */ - with GPR3 STACK + with GPR0 STACK gen bl {LABEL, ".inn"} -#endif - + yields R0 + + pat ior !defined($1) /* Or two sets */ + with STACK + gen + bl {LABEL, ".ior"} + /* Boolean resolutions */ @@ -1095,28 +1190,16 @@ PATTERNS pop FP, PC pat blm /* Block move constant length */ - with GPRI GPRI STACK - uses REG - gen - sub SP, {CONST, 12} - mov %a, {CONST, $1} - st %1, {GPROFFSET, SP, 0} - st %2, {GPROFFSET, SP, 4} - st %a, {GPROFFSET, SP, 8} - bl {LABEL, "_memmove"} - add SP, {CONST, 12} - -#if 0 + leaving + loc $1 + bls + pat bls /* Block move variable length */ - with GPR GPR GPR STACK + with STACK + kills ALL gen - stwu %1, {GPROFFSET, SP, 0-4} - stwu %3, {GPROFFSET, SP, 0-4} - stwu %2, {GPROFFSET, SP, 0-4} bl {LABEL, "_memmove"} - addi SP, SP, {CONST, 12} -#endif - + pat csa /* Array-lookup switch */ with STACK gen @@ -1328,3 +1411,6 @@ PATTERNS bl {LABEL, ".fef"} yields R0 R1 + pat zrf /* Load a floating zero */ + leaving + loc 0 diff --git a/plat/rpi/include/unistd.h b/plat/rpi/include/unistd.h index ddd8739d3..4cbf43c58 100644 --- a/plat/rpi/include/unistd.h +++ b/plat/rpi/include/unistd.h @@ -9,12 +9,32 @@ #define _UNISTD_H #include +#include /* Types */ typedef int pid_t; typedef int mode_t; +typedef long suseconds_t; + +/* Time handling. */ + +struct timeval +{ + time_t tv_sec; + suseconds_t tv_usec; +}; + +struct timezone +{ + int tz_minuteswest; + int tz_dsttime; +}; /* obsolete, unused */ + +extern int gettimeofday(struct timeval* tv, struct timezone* tz); +extern int settimeofday(const struct timeval* tv, const struct timezone* tz); + /* Constants for file access (open and friends) */ enum From 5e9102955c72f98b50919db1d85e842bd3a5c756 Mon Sep 17 00:00:00 2001 From: David Given Date: Tue, 21 May 2013 23:17:30 +0100 Subject: [PATCH 21/76] Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code! --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 68 ++++++++++++++----- mach/vc4/libem/dummy.s | 5 ++ plat/rpi/boot.s | 16 +++++ util/led/relocate.c | 151 +++++++++++++++++++++++++++++++---------- 4 files changed, 187 insertions(+), 53 deletions(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index d6f71e7fb..b1ce314bc 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -7,6 +7,11 @@ #define maskx(v, x) (v & ((1<<(x))-1)) +static void toobig(void) +{ + serror("offset too big to encode into instruction"); +} + /* Assemble an ALU instruction where rb is a register. */ void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) @@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) void branch_instr(int bl, int cc, struct expr_t* expr) { + quad pc = DOTVAL; quad type = expr->typ & S_TYP; /* Sanity checking. */ @@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) /* The VC4 branch instructions express distance in 2-byte * words. */ - int d = (expr->val - DOTVAL) / 2; + int d = (expr->val - pc) / 2; /* We now know the worst case for the instruction layout. At * this point we can emit the instructions, which may shrink @@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) * close enough to the program counter, we can use a short- * form instruction. */ - if ((d >= -128) && (d < 127)) + if (fitx(d, 7)) { emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); break; @@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr) /* Absolute addresses and references to other sections * need the full 32 bits. */ - newrelo(expr->typ, RELOVC4 | RELPC); + newrelo(expr->typ, RELOVC4|RELPC); if (bl) { - quad v = d & 0x07ffffff; - quad hiv = v >> 23; - quad lov = v & 0x007fffff; + quad v, hiv, lov; + + if (!fitx(d, 27)) + toobig(); + + v = maskx(d, 27); + hiv = v >> 23; + lov = v & 0x007fffff; emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); emit2(B16(00000000,00000000) | (lov&0xffff)); } else { - quad v = d & 0x007fffff; + quad v; + + if (!fitx(d, 23)) + toobig(); + + v = maskx(d, 23); emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); emit2(B16(00000000,00000000) | (v&0xffff)); } @@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) void mem_address_instr(quad opcode, int rd, struct expr_t* expr) { + static const char sizes[] = {4, 2, 1, 2}; + int size = sizes[opcode]; quad type = expr->typ & S_TYP; /* Sanity checking. */ @@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) case 1: case 2: { - /* The VC4 branch instructions express distance in 2-byte - * words. */ - - int d = (expr->val - DOTVAL) / 2; + int d = expr->val - DOTVAL; /* We now know the worst case for the instruction layout. At * this point we can emit the instructions, which may shrink @@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) if (type == DOTTYP) { + int scaledd = d/size; + /* This is a reference to an address within this section. If * it's close enough to the program counter, we can use a * shorter instruction. */ - if (fitx(d, 16)) + if (fitx(scaledd, 16)) { emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); - emit2(d); + emit2(scaledd); return; } } /* Otherwise we need the full 48 bits. */ - if (!fitx(d, 27)) - serror("offset too big to encode into instruction"); + newrelo(expr->typ, RELOVC4|RELPC); - newrelo(expr->typ, RELOVC4 | RELPC); + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + + if (!fitx(d, 27)) + toobig(); emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); emit4((31<<27) | maskx(d, 27)); @@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs) void lea_address_instr(int rd, struct expr_t* expr) { - newrelo(expr->typ, RELOVC4); + quad pc = DOTVAL; + quad type = expr->typ & S_TYP; + + if (type == S_ABS) + serror("can't use absolute addresses here"); + + newrelo(expr->typ, RELOVC4|RELPC); + + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + emit2(B16(11100101,00000000) | (rd<<0)); - emit4(expr->val); + emit4(expr->val - pc); } diff --git a/mach/vc4/libem/dummy.s b/mach/vc4/libem/dummy.s index 4edaa030f..fdbcc4c38 100644 --- a/mach/vc4/libem/dummy.s +++ b/mach/vc4/libem/dummy.s @@ -7,3 +7,8 @@ */ #include "videocore.h" + +.define __dummy +.sect .data +__dummy: + diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 3cf4f3fe1..b848e65c4 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -16,6 +16,10 @@ .sect .text begtext: + lea r15, begtext + st sp, .returnsp + st lr, .returnlr + #if 0 ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) @@ -33,6 +37,12 @@ begtext: #endif b __m_a_i_n +.define __exit +__exit: + ld sp, .returnsp + ld lr, .returnlr + b lr + ! Define symbols at the beginning of our various segments, so that we can find ! them. (Except .text, which has already been done.) @@ -47,3 +57,9 @@ begtext: .comm .trppc, 4 .comm .ignmask, 4 .comm _errno, 4 + +! We store the stack pointer and return address on entry so that we can +! cleanly exit. + +.comm .returnsp, 4 +.comm .returnlr, 4 diff --git a/util/led/relocate.c b/util/led/relocate.c index f44a34b96..3cc9ff904 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -8,6 +8,7 @@ static char rcsid[] = "$Id$"; #include #include +#include #include #include "out.h" #include "const.h" @@ -44,6 +45,65 @@ static long read4(char* addr, int type) return ((long)word1 << (2 * WIDTH)) + word0; } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static long get_vc4_valu(char* addr) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27] + */ + + int32_t value = read4(addr+2, 0); + value &= 0x07ffffff; + value = value<<5>>5; + return value; + } + + if ((opcode & 0xf080) == 0x9000) + { + /* b $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t value = read4(addr, RELWR); + value &= 0x007fffff; + value = value<<9>>9; + value *= 2; + return value; + } + + if ((opcode & 0xf080) == 0x9080) + { + /* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + int32_t value = read4(addr, RELWR); + int32_t lov = value & 0x007fffff; + int32_t hiv = value & 0x0f000000; + value = lov | (hiv>>1); + value = value<<5>>5; + value *= 2; + return value; + } + + if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + return read4(addr+2, 0); + } + + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -65,21 +125,7 @@ getvalu(addr, type) case RELOH2: return read2(addr, type) << 16; case RELOVC4: - { - long i = read4(addr, type); - if (i & 0x00800000) - { - /* Branch instruction. */ - return (i<<9)>>9; - } - else - { - /* Branch-link instruction. */ - long hi = (i<<4)>>28; - long lo = (i & 0x007fffff); - return lo | (hi<<23); - } - } + return get_vc4_valu(addr); default: fatal("bad relocation size"); } @@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type) } } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static void put_vc4_valu(char* addr, long value) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27] + */ + + uint32_t v = read4(addr+2, 0); + v &= 0xf8000000; + v |= value & 0x07ffffff; + write4(v, addr+2, 0); + } + else if ((opcode & 0xf080) == 0x9000) + { + /* b dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t v = read4(addr, RELWR); + v &= 0xff800000; + v |= (value/2) & 0x007fffff; + write4(v, addr, RELWR); + } + else if ((opcode & 0xf080) == 0x9080) + { + /* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + uint32_t v = read4(addr, RELWR); + uint32_t lovalue = (value/2) & 0x007fffff; + uint32_t hivalue = (value/2) & 0x07800000; + v &= 0xf0800000; + v |= lovalue | (hivalue<<1); + write4(v, addr, RELWR); + } + else if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + write4(value, addr+2, 0); + } + else + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -156,27 +256,8 @@ putvalu(valu, addr, type) write2(valu>>16, addr, type); break; case RELOVC4: - { - long i = read4(addr, type); - if (i & 0x00800000) - { - /* Branch instruction. */ - unsigned v = (valu/2) & 0x007fffff; - i &= ~0x007fffff; - i |= v; - } - else - { - /* Branch-link instruction. */ - unsigned v = (valu/2) & 0x07ffffff; - unsigned hiv = v >> 23; - unsigned lov = v & 0x007fffff; - i &= ~0x0f7fffff; - i |= (lov>>16) | (hiv<<24); - } - write4(i, addr, type); + put_vc4_valu(addr, valu); break; - } default: fatal("bad relocation size"); } From f904465e9c2dbc1cb5aec0e4c0ad5e375d3043ed Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 00:16:16 +0100 Subject: [PATCH 22/76] Fix signedness problem that was showing up on ARM. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index b1ce314bc..f3671bf64 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -5,6 +5,8 @@ * See the file 'Copying' in the root of the distribution for the full text. */ +#include + #define maskx(v, x) (v & ((1<<(x))-1)) static void toobig(void) @@ -120,7 +122,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) /* The VC4 branch instructions express distance in 2-byte * words. */ - int d = (expr->val - pc) / 2; + int d = ((int32_t)expr->val - (int32_t)pc) / 2; /* We now know the worst case for the instruction layout. At * this point we can emit the instructions, which may shrink From 72542288cd95f93740d88fec17380ef3e0129cd2 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 00:16:59 +0100 Subject: [PATCH 23/76] Adjust bootstrap code to build kernels that work with the mailbox test app. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 50 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index b848e65c4..f5c8bafd4 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -15,27 +15,47 @@ .sect .text +#define gp r15 + begtext: - lea r15, begtext + ! Set up system registers. + + lea gp, begtext st sp, .returnsp st lr, .returnlr -#if 0 + ! The GPU kernel code will load parameters into r0-r5. Save them + ! so that the user code can access them. + + sub r0, gp ! pointer + sub r1, gp ! pointer + sub r2, gp ! pointer + sub r3, gp ! pointer + ! r4-r5 are not pointers and don't need adjusting + push r0-r5 + sub r0, sp, gp + st r0, _gpu_parameters + ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) - mov di, begbss - mov cx, endbss - sub cx, di - mov ax, 0 - rep stosb + lea r0, begbss + lea r1, endbss + mov r2, #0 +_1: + stb r2, (r0) + addcmpb.lt r2, #1, r1, _1 ! Push standard parameters onto the stack and go. - push envp ! envp - push argv ! argv - push 1 ! argc -#endif - b __m_a_i_n + mov r0, #0 + push r0 ! envp + push r0 ! argv + push r0 ! argc + + ! Call the language startup code. + + bl __m_a_i_n + ! Fall through to __exit if this returns. .define __exit __exit: @@ -63,3 +83,9 @@ __exit: .comm .returnsp, 4 .comm .returnlr, 4 + +! User pointer to the GPU kernel parameter block. + +.define _gpu_parameters +.comm _gpu_parameters, 4 + From 08c433422449f50a22fa3a82b4d33c062166f045 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 00:44:04 +0100 Subject: [PATCH 24/76] Typo fixes. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach4.c | 4 ++-- mach/vc4/ncg/table | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index 73cbea8e4..4c87209ee 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -22,8 +22,8 @@ operation | OP_BRANCH CC GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, 0, $3, $6, &$8); } | OP_ADDCMPB GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $4, $6, &$8); } | OP_ADDCMPB CC GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $5, $7, &$9); } - | OP_ADDCMPB GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $5, $7, &$9); } - | OP_ADDCMPB CC GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $6, $8, &$10); } + | OP_ADDCMPB GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, $2, $5, $7, &$9); } + | OP_ADDCMPB CC GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, $3, $6, $8, &$10); } | OP_ADDCMPB GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr(ALWAYS, $2, $4, $7, &$9); } | OP_ADDCMPB CC GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr($2, $3, $5, $8, &$10); } | OP_ADDCMPB GPR ',' '#' absexp ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, $2, $5, $8, &$10); } diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index fd9a106e8..c8dbcb77c 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -1044,17 +1044,17 @@ PATTERNS pat zle call anyz("b.le") /* Branch if signed top <= 0 */ proc anyb example beq - with GPR+CONST GPRI STACK + with GPRI+CONST GPRI STACK gen cmp %2, %1 beq[1] {LABEL, $1} - pat beq call anyz("b.eq") /* Branch if signed second == top */ - pat bne call anyz("b.ne") /* Branch if signed second != top */ - pat bgt call anyz("b.gt") /* Branch if signed second > top */ - pat bge call anyz("b.ge") /* Branch if signed second >= top */ - pat blt call anyz("b.lt") /* Branch if signed second < top */ - pat ble call anyz("b.le") /* Branch if signed second <= top */ + pat beq call anyb("b.eq") /* Branch if signed second == top */ + pat bne call anyb("b.ne") /* Branch if signed second != top */ + pat bgt call anyb("b.gt") /* Branch if signed second > top */ + pat bge call anyb("b.ge") /* Branch if signed second >= top */ + pat blt call anyb("b.lt") /* Branch if signed second < top */ + pat ble call anyb("b.le") /* Branch if signed second <= top */ proc cmu_z example cmu zeq with GPR+CONST GPRI STACK From 6b5316dcfa8e39d589e0a85e2f3e8063111245c8 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 00:52:58 +0100 Subject: [PATCH 25/76] More typo fixes. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index f5c8bafd4..e190f2ae3 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -43,7 +43,7 @@ begtext: mov r2, #0 _1: stb r2, (r0) - addcmpb.lt r2, #1, r1, _1 + addcmpb.lt r0, #1, r1, _1 ! Push standard parameters onto the stack and go. From 0068952bd1f0230cab4602bae6b863996ba0c872 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 09:47:22 +0100 Subject: [PATCH 26/76] Build the Raspberry Pi code. --HG-- branch : dtrg-videocore --- Makefile | 4 ++++ README | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e203d3243..dbac9cabd 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,8 @@ CFLAGS += \ LDFLAGS += +ACKFLAGS = -Ih + all: installables .DELETE_ON_ERROR: @@ -101,6 +103,7 @@ include mach/i386/build.mk include mach/i86/build.mk include mach/m68020/build.mk # include mach/powerpc/build.mk +include mach/vc4/build.mk include plat/build.mk include plat/pc86/build.mk @@ -108,6 +111,7 @@ include plat/cpm/build.mk include plat/linux386/build.mk include plat/linux68k/build.mk # include plat/linuxppc/build.mk +include plat/rpi/build.mk .PHONY: installables installables: $(INSTALLABLES) diff --git a/README b/README index d6a56b050..52abe73be 100644 --- a/README +++ b/README @@ -32,6 +32,7 @@ pc86 produces bootable floppy disk images for 8086 PCs linux386 produces ELF executables for PC Linux systems linux68k produces ELF executables for m68020 Linux systems cpm produces i80 CP/M .COM files +rpi produces Raspberry Pi GPU binaries INSTALLATION @@ -124,7 +125,7 @@ GOTCHAS There are some things you should be aware of. -- Look at plat//README for information about the two supported +- Look at plat//README for information about the supported platforms. - The library support is fairly limited; for C, it's at roughly the ANSI C From 9e556d8b7ba8f8eb8ff731677935189a0c52af7b Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 10:55:13 +0100 Subject: [PATCH 27/76] Optimise direct access to globals, and incrementing and decrementing locals. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 103 ++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index c8dbcb77c..a82fc49fb 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -276,12 +276,10 @@ PATTERNS with GPRI GPRI yields %1 %2 -#if 0 pat stl lol $1==$2 /* Store then load local */ leaving dup QUAD stl $1 -#endif pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ leaving @@ -352,8 +350,8 @@ PATTERNS pat lal /* Load address of local */ uses REG gen - add %a, FP, {CONST, $1} - sub %a, GP + sub %a, FP, GP + add %a, {CONST, $1} yields %a pat lol inreg($1)>0 /* Load from local */ @@ -367,8 +365,8 @@ PATTERNS pat ldl /* Load double-word from local */ leaving - lal $1 - loi QUAD*2 + lol $1 + QUAD*0 + lol $1 + QUAD*1 pat stl inreg($1)>0 /* Store to local */ with CONST+GPRI @@ -383,8 +381,8 @@ PATTERNS pat sdl /* Store double-word to local */ leaving - lal $1 - sti QUAD*2 + stl $1 + QUAD*1 + stl $1 + QUAD*0 pat lil inreg($1)>0 /* Load from indirected local */ uses REG @@ -404,26 +402,36 @@ PATTERNS pat stl lol $1==$2 /* Save then load (generated by C compiler) */ leaving - dup 4 + dup QUAD stl $1 - pat zrl /* Zero local */ + pat zrl /* Zero local */ leaving loc 0 stl $1 - pat inl /* Increment local */ + pat inl inreg($1)>0 /* Increment local in register */ + kills regvar($1) + gen + add {GPRE, regvar($1)}, {CONST, 1} + + pat inl inreg($1)<=0 /* Increment local */ leaving lol $1 loc 1 - adi 4 + adi QUAD stl $1 - pat del /* Decrement local */ + pat del inreg($1)>0 /* Decrement local in register */ + kills regvar($1) + gen + sub {GPRE, regvar($1)}, {CONST, 1} + + pat del inreg($1)<=0 /* Decrement local */ leaving lol $1 loc 1 - sbi 4 + sbi QUAD stl $1 @@ -512,6 +520,11 @@ PATTERNS /* Loads and stores */ pat loi $1==BYTE /* Load byte indirect */ + with LABEL + uses REG + gen + ldb %a, %1 + yields %a with GPR uses reusing %1, REG gen @@ -523,51 +536,46 @@ PATTERNS ldb %a, {GPRGPR, %1.reg, GP} yields %a -#if 0 - pat loi loc loc cii $1==WORD && $2==WORD && $3==QUAD /* Load half-word indirect and sign extend */ - with GPR + pat loi loc loc cii $1==WORD && $2==WORD && $3==QUAD /* Load short indirect and sign extend */ + with LABEL uses REG gen - lha %a, {GPROFFSET, %1, 0} + ldhs %a, %1 yields %a - with SUM_RR + with GPRI uses reusing %1, REG gen - lhax %a, %1.reg1, %1.reg2 - yields %a - with SUM_RC - uses REG - gen - move {IND_RC_H_S, %1.reg, %1.off}, %a + add %a, %1, GP + ldhs %a, {GPROFFSET, %a, 0} yields %a - pat loi $1==WORD /* Load half-word indirect */ - with GPR + pat loi $1==WORD /* Load short indirect */ + with LABEL uses REG gen - lhz %a, {GPROFFSET, %1, 0} + ldh %a, %1 yields %a - with SUM_RR + with GPRI uses reusing %1, REG gen - lhzx %a, %1.reg1, %1.reg2 + add %a, %1, GP + ldh %a, {GPROFFSET, %a, 0} yields %a - with SUM_RC - uses REG - gen - move {IND_RC_H, %1.reg, %1.off}, %a - yields %a -#endif - + pat loi $1==QUAD /* Load quad indirect */ - with GPR + with LABEL + uses REG + gen + ld %a, %1 + yields %a + with GPRI uses reusing %1, REG gen add %a, %1, GP ld %a, {GPROFFSET, %a, 0} yields %a - pat loi /* Load arbitrary size */ + pat loi !nicesize($1) /* Load arbitrary size */ leaving loc $1 los QUAD @@ -579,6 +587,9 @@ PATTERNS bl {LABEL, ".los"} pat sti $1==BYTE /* Store byte indirect */ + with LABEL GPRI + gen + stb %2, %1 with GPR GPRI gen stb %2, {GPRGPR, %1, GP} @@ -587,15 +598,21 @@ PATTERNS stb %2, {GPRGPR, %1.reg, GP} pat sti $1==WORD /* Store half-word indirect */ - with GPR GPR - uses REG + with LABEL GPRI + gen + sth %2, %1 + with GPRI GPRI + uses reusing %1, REG gen add %a, %1, GP sth %2, {GPROFFSET, %a, 0} pat sti $1==QUAD /* Store quad indirect */ - with GPR GPR - uses REG + with LABEL GPRI + gen + st %2, %1 + with GPRI GPRI + uses reusing %1, REG gen add %a, %1, GP st %2, {GPROFFSET, %a, 0} From cdce394b6c4fd85c53e015cf26f36deecddb4bd3 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 11:02:46 +0100 Subject: [PATCH 28/76] Generate ld rd, (rs) instructions properly. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index f3671bf64..53ec3464c 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -254,7 +254,7 @@ void mem_instr(quad opcode, int cc, int rd, long offset, int rs) if ((rs < 0x10) && (rd < 0x10) && (offset == 0)) { - emit2(B16(00001000,00000000) | (opcode<<8) | (rs<<4) | (rd<<4)); + emit2(B16(00001000,00000000) | (opcode<<8) | (rs<<4) | (rd<<0)); return; } From b5e5df4a63b8e5c8269faa881d4a1d8492aa8c7b Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 18:25:30 +0100 Subject: [PATCH 29/76] Allocate a user stack to run code in. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index e190f2ae3..9e54d3292 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -16,6 +16,12 @@ .sect .text #define gp r15 +#define STACKSIZE 16*1024 + +! MAIN ENTRY POINT +! +! When running as a kernel, our parameters are passed in in r0-r5, so +! the startup sequence mustn't disturb these. begtext: ! Set up system registers. @@ -24,27 +30,28 @@ begtext: st sp, .returnsp st lr, .returnlr - ! The GPU kernel code will load parameters into r0-r5. Save them - ! so that the user code can access them. + ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) + + lea r6, begbss + lea r7, endbss + mov r8, #0 +_1: + stb r8, (r6) + addcmpb.lt r6, #1, r7, _1 + ! Set up the new stack and save the kernel parameters to it. + + lea sp, .stack + STACKSIZE sub r0, gp ! pointer sub r1, gp ! pointer sub r2, gp ! pointer sub r3, gp ! pointer ! r4-r5 are not pointers and don't need adjusting push r0-r5 + sub r0, sp, gp st r0, _gpu_parameters - ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) - - lea r0, begbss - lea r1, endbss - mov r2, #0 -_1: - stb r2, (r0) - addcmpb.lt r0, #1, r1, _1 - ! Push standard parameters onto the stack and go. mov r0, #0 @@ -89,3 +96,7 @@ __exit: .define _gpu_parameters .comm _gpu_parameters, 4 +! User stack. + +.comm .stack, STACKSIZE + From 7537c85e0a09e628b092b808245b2b7e5b7b5e45 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 21:37:48 +0100 Subject: [PATCH 30/76] Generate adds instructions when a shift-and-add is seen (useful for array indexing). --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 82 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index a82fc49fb..65c47e4ed 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -99,6 +99,11 @@ INSTRUCTIONS add GPRI:wo, GPRI:ro, GPRI+CONST:ro. add GPRI:rw, GPRI+CONST:ro. + adds2 GPRI:rw, GPRI+CONST:ro. + adds4 GPRI:rw, GPRI+CONST:ro. + adds8 GPRI:rw, GPRI+CONST:ro. + adds16 GPRI:rw, GPRI+CONST:ro. + adds256 GPRI:rw, GPRI:rw, GPRI:ro. and GPRI:rw, GPRI+CONST:ro. asr GPRI:rw, GPRI+CONST:ro. beq "b.eq" LABEL:ro. @@ -632,16 +637,16 @@ PATTERNS /* Arithmetic wrappers */ - pat ads $1==4 /* Add var to pointer */ + pat ads /* Add var to pointer */ leaving adi $1 - pat sbs $1==4 /* Subtract var from pointer */ + pat sbs /* Subtract var from pointer */ leaving sbi $1 pat adp /* Add constant to pointer */ leaving loc $1 - adi 4 + adi QUAD pat adu /* Add unsigned */ leaving @@ -654,21 +659,21 @@ PATTERNS pat inc /* Add 1 */ leaving loc 1 - adi 4 + adi QUAD pat dec /* Subtract 1 */ leaving loc 1 - sbi 4 + sbi QUAD - pat loc mlu $2==2 /* Unsigned multiply by constant */ + pat loc mlu /* Unsigned multiply by constant */ leaving loc $1 - mli 4 + mli QUAD pat mlu /* Unsigned multiply by var */ leaving - mli $1 + mli QUAD pat loc slu /* Shift left unsigned by constant amount */ leaving @@ -905,6 +910,51 @@ PATTERNS +/* Special arithmetic */ + + pat loc sli adi $1==1 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<1) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + adds2 %a, %1 + yields %a + + pat loc sli adi $1==2 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<2) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + adds4 %a, %1 + yields %a + + pat loc sli adi $1==3 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<3) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + adds8 %a, %1 + yields %a + + pat loc sli adi $1==4 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<4) */ + with GPRI+CONST GPRI + uses reusing %2, REG=%2 + gen + adds16 %a, %1 + yields %a + + pat loc sli adi $1==8 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<8) */ + with GPRI GPRI + uses reusing %2, REG + gen + adds256 %a, %2, %1 + yields %a + + pat loc sli ads + leaving + loc $1 + sli $2 + adi $3 + + + /* Arrays */ pat aar $1==QUAD /* Index array */ @@ -1097,22 +1147,6 @@ PATTERNS pat cmf zge call cmf_z("b.ge") /* Branch if float second >= top */ pat cmf zle call cmf_z("b.le") /* Branch if float second <= top */ - -#if 0 - - pat cmi /* Signed tristate compare */ - with CONST GPR - yields {TRISTATE_RC_S, %2, %1.val} - with GPR GPR - yields {TRISTATE_RR_S, %2, %1} - - pat cmu /* Unsigned tristate compare */ - with CONST GPR - yields {TRISTATE_RC_U, %2, %1.val} - with GPR GPR - yields {TRISTATE_RR_U, %2, %1} -#endif - pat cmp /* Compare pointers */ leaving cmu QUAD From 8f338f9b44af5f4fa24abf70f5fd4ce8861fc256 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 22:46:10 +0100 Subject: [PATCH 31/76] Now actually runs on real hardware. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 9e54d3292..43811a6ad 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -24,13 +24,8 @@ ! the startup sequence mustn't disturb these. begtext: - ! Set up system registers. - - lea gp, begtext - st sp, .returnsp - st lr, .returnlr - - ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) + ! Wipe the bss. This must happen absolutely first, because we need + ! to store the old system registers into it. lea r6, begbss lea r7, endbss @@ -39,15 +34,32 @@ _1: stb r8, (r6) addcmpb.lt r6, #1, r7, _1 + ! Set up system registers. + + lea gp, begtext + st fp, .returnfp + st sp, .returnsp + st lr, .returnlr + ! Set up the new stack and save the kernel parameters to it. - lea sp, .stack + STACKSIZE + lea sp, .stack + STACKSIZE - 6*4 + sub r0, gp ! pointer + st r0, 0 (sp) + sub r1, gp ! pointer + st r1, 4 (sp) + sub r2, gp ! pointer + st r2, 8 (sp) + sub r3, gp ! pointer + st r3, 12 (sp) + ! r4-r5 are not pointers and don't need adjusting - push r0-r5 + st r4, 16 (sp) + st r5, 20 (sp) sub r0, sp, gp st r0, _gpu_parameters @@ -62,10 +74,12 @@ _1: ! Call the language startup code. bl __m_a_i_n + ! Fall through to __exit if this returns. .define __exit __exit: + ld fp, .returnfp ld sp, .returnsp ld lr, .returnlr b lr @@ -88,6 +102,7 @@ __exit: ! We store the stack pointer and return address on entry so that we can ! cleanly exit. +.comm .returnfp, 4 .comm .returnsp, 4 .comm .returnlr, 4 From 6cbe6e1c4ee590cc8aa33ef3e098a0bd7d3c120f Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 23:12:48 +0100 Subject: [PATCH 32/76] Better treatment of sign extension. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 150 +++++++++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 67 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 65c47e4ed..3c96d711c 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -79,6 +79,12 @@ TOKENS GPRE = { GPR reg; } 4 reg. +/* Sign extended values. */ + + /* The size refers to the *source*. */ + SIGNEX8 = { GPR reg; } 4 reg. + SIGNEX16 = { GPR reg; } 4 reg. + /* The results of comparisons. */ TRISTATE_RC_S = { GPR reg; INT val; } 4. @@ -92,59 +98,61 @@ SETS TOKEN = LABEL + CONST. GPRI = GPR + GPRE. + OP = TOKEN + SIGNEX8 + SIGNEX16. + XREG = GPRI + SIGNEX8 + SIGNEX16. INSTRUCTIONS - add GPRI:wo, GPRI:ro, GPRI+CONST:ro. - add GPRI:rw, GPRI+CONST:ro. - adds2 GPRI:rw, GPRI+CONST:ro. - adds4 GPRI:rw, GPRI+CONST:ro. - adds8 GPRI:rw, GPRI+CONST:ro. - adds16 GPRI:rw, GPRI+CONST:ro. - adds256 GPRI:rw, GPRI:rw, GPRI:ro. - and GPRI:rw, GPRI+CONST:ro. - asr GPRI:rw, GPRI+CONST:ro. + add XREG:wo, XREG:ro, XREG+CONST:ro. + add XREG:rw, XREG+CONST:ro. + adds2 XREG:rw, XREG+CONST:ro. + adds4 XREG:rw, XREG+CONST:ro. + adds8 XREG:rw, XREG+CONST:ro. + adds16 XREG:rw, XREG+CONST:ro. + adds256 XREG:rw, XREG:rw, XREG:ro. + and XREG:rw, XREG+CONST:ro. + asr XREG:rw, XREG+CONST:ro. beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. bgt "b.gt" LABEL:ro. bgt "b.gt" LABEL:ro. bhi "b.hi" LABEL:ro. - b GPRI+LABEL:ro. - bl GPRI+LABEL:ro. - cmp GPRI:ro, GPRI+CONST:ro kills :cc. - divs GPRI:wo, GPRI:ro, GPRI+CONST:ro. - divu GPRI:wo, GPRI:ro, GPRI+CONST:ro. - eor GPRI:rw, GPRI+CONST:ro. - exts GPRI:wo, GPRI:ro, GPRI+CONST:ro. - exts GPRI:rw, GPRI+CONST:ro. - fadd GPRI:wo, GPRI:ro, GPRI:ro. - fcmp GPRI:wo, GPRI:ro, GPRI:ro. - fdiv GPRI:wo, GPRI:ro, GPRI:ro. - fmul GPRI:wo, GPRI:ro, GPRI:ro. - fsub GPRI:wo, GPRI:ro, GPRI:ro. - ld GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldb GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldh GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldhs GPRI:wo, GPROFFSET+GPRGPR+LABEL:ro. - lea GPRI:wo, LABEL:ro. - lsl GPRI:rw, GPRI+CONST:ro. - lsl GPRI:wo, GPRI:ro, GPRI+CONST:ro. - lsr GPRI:rw, GPRI+CONST:ro. - mov GPRI:wo, GPRI+CONST:ro. - mul GPRI:rw, GPRI+CONST:ro. - neg GPRI:rw, GPRI+CONST:ro. - or GPRI:rw, GPRI+CONST:ro. + b XREG+LABEL:ro. + bl XREG+LABEL:ro. + cmp XREG:ro, XREG+CONST:ro kills :cc. + divs XREG:wo, XREG:ro, XREG+CONST:ro. + divu XREG:wo, XREG:ro, XREG+CONST:ro. + eor XREG:rw, XREG+CONST:ro. + exts XREG:wo, XREG:ro, XREG+CONST:ro. + exts XREG:rw, XREG+CONST:ro. + fadd XREG:wo, XREG:ro, XREG:ro. + fcmp XREG:wo, XREG:ro, XREG:ro. + fdiv XREG:wo, XREG:ro, XREG:ro. + fmul XREG:wo, XREG:ro, XREG:ro. + fsub XREG:wo, XREG:ro, XREG:ro. + ld XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldb XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldh XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldhs XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. + lea XREG:wo, LABEL:ro. + lsl XREG:rw, XREG+CONST:ro. + lsl XREG:wo, XREG:ro, XREG+CONST:ro. + lsr XREG:rw, XREG+CONST:ro. + mov XREG:wo, XREG+CONST:ro. + mul XREG:rw, XREG+CONST:ro. + neg XREG:rw, XREG+CONST:ro. + or XREG:rw, XREG+CONST:ro. pop STACKABLE:wo. pop STACKABLE:wo, GPRLR+GPRPC:wo. push STACKABLE:ro. - sub GPRI:wo, GPRI:ro, CONST+GPRI:ro. - sub GPRI:rw, GPRI+CONST:ro. - st GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. - stb GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. - sth GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. - sths GPRI:ro, GPROFFSET+GPRGPR+LABEL:ro. + sub XREG:wo, XREG:ro, CONST+XREG:ro. + sub XREG:rw, XREG+CONST:ro. + st XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. + stb XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. + sth XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. + sths XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. invalid "invalid". @@ -175,6 +183,16 @@ MOVES lea %2, {LABEL, %1.adr} sub %2, GP +/* Sign extension */ + + from SIGNEX8 to GPR + gen + exts %2, %1.reg, {CONST, 8} + + from SIGNEX16 to GPR + gen + exts %2, %1.reg, {CONST, 16} + /* Miscellaneous */ from CONST+LABEL+GPR+GPRE to GPRE @@ -218,13 +236,13 @@ STACKINGRULES sub SP, {CONST, 4} st %1, {GPROFFSET, SP, 0} - from TOKEN to STACK + from OP to STACK uses STACKABLE gen move %1, %a push %a - from TOKEN to STACK + from OP to STACK gen COMMENT("fallback stack") move %1, SCRATCH @@ -239,20 +257,12 @@ COERCIONS uses reusing %1, REG=%1 yields %a - from CONST + from OP uses REG gen - COMMENT("coerce CONST->REG") move %1, %a yields %a - from LABEL - uses REG - gen - COMMENT("coerce LABEL->REG") - move %1, %a - yields %a - from STACK uses STACKABLE gen @@ -335,18 +345,24 @@ PATTERNS /* nop */ pat loc loc cii $1==BYTE && $2>BYTE /* signed char -> anything */ - with REG - uses reusing %1, REG=%1 - gen - exts %a, {CONST, 8} - yields %a - + with GPR + yields {SIGNEX8, %1} + with GPRE + yields {SIGNEX8, %1.reg} + with SIGNEX8 + yields {SIGNEX8, %1.reg} + with SIGNEX16 + yields {SIGNEX8, %1.reg} + pat loc loc cii $1==WORD && $2>WORD /* signed short -> anything */ - with REG - uses reusing %1, REG=%1 - gen - exts %a, {CONST, 16} - yields %a + with GPR + yields {SIGNEX16, %1} + with GPRE + yields {SIGNEX16, %1.reg} + with SIGNEX8 + yields {SIGNEX16, %1.reg} + with SIGNEX16 + yields {SIGNEX16, %1.reg} @@ -592,21 +608,21 @@ PATTERNS bl {LABEL, ".los"} pat sti $1==BYTE /* Store byte indirect */ - with LABEL GPRI + with LABEL GPRI+SIGNEX8+SIGNEX16 gen stb %2, %1 - with GPR GPRI + with GPR GPRI+SIGNEX8+SIGNEX16 gen stb %2, {GPRGPR, %1, GP} - with GPRE GPRI + with GPRE GPRI+SIGNEX8+SIGNEX16 gen stb %2, {GPRGPR, %1.reg, GP} pat sti $1==WORD /* Store half-word indirect */ - with LABEL GPRI + with LABEL GPRI+SIGNEX16 gen sth %2, %1 - with GPRI GPRI + with GPRI GPRI+SIGNEX16 uses reusing %1, REG gen add %a, %1, GP From 2c7ee272065d29bb145c5e9c474920b96e6c7918 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 22 May 2013 23:55:23 +0100 Subject: [PATCH 33/76] Double-quads can be loaded and stored (more) correctly. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 53 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 3c96d711c..e5cc0e190 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -386,8 +386,8 @@ PATTERNS pat ldl /* Load double-word from local */ leaving - lol $1 + QUAD*0 lol $1 + QUAD*1 + lol $1 + QUAD*0 pat stl inreg($1)>0 /* Store to local */ with CONST+GPRI @@ -402,8 +402,8 @@ PATTERNS pat sdl /* Store double-word to local */ leaving - stl $1 + QUAD*1 stl $1 + QUAD*0 + stl $1 + QUAD*1 pat lil inreg($1)>0 /* Load from indirected local */ uses REG @@ -496,14 +496,15 @@ PATTERNS dec sti QUAD - pat lde /* Load external */ - uses REG, REG - gen - lea %a, {LABEL, $1} - ld %b, {GPROFFSET, %a, 4} - ld %a, {GPROFFSET, %a, 0} - yields %b %a + pat lde /* Load double external */ + leaving + lae $1 + loi QUAD*2 + pat sde /* Store double external */ + leaving + lae $1 + sti QUAD*2 /* Structures */ @@ -570,7 +571,7 @@ PATTERNS ldhs %a, {GPROFFSET, %a, 0} yields %a - pat loi $1==WORD /* Load short indirect */ + pat loi $1==WORD /* Load short indirect */ with LABEL uses REG gen @@ -583,7 +584,7 @@ PATTERNS ldh %a, {GPROFFSET, %a, 0} yields %a - pat loi $1==QUAD /* Load quad indirect */ + pat loi $1==QUAD /* Load quad indirect */ with LABEL uses REG gen @@ -596,6 +597,22 @@ PATTERNS ld %a, {GPROFFSET, %a, 0} yields %a + pat loi $1==2*QUAD /* Load double-quad indirect */ + with LABEL + uses REG, REG + gen + lea %b, %1 + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + yields %b %a + with GPRI + uses reusing %1, REG, REG + gen + add %b, %1, GP + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + yields %b %a + pat loi !nicesize($1) /* Load arbitrary size */ leaving loc $1 @@ -638,6 +655,20 @@ PATTERNS add %a, %1, GP st %2, {GPROFFSET, %a, 0} + pat sti $1==2*QUAD /* Load double-quad indirect */ + with LABEL GPRI GPRI + uses REG + gen + lea %a, %1 + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + with GPRI GPRI GPRI + uses reusing %1, REG=%1 + gen + add %a, GP + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + pat sti /* Store arbitrary size */ leaving loc $1 From 98a51732abc07a9dcf2748c2b579d0d2e81e4566 Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 24 May 2013 17:04:29 +0100 Subject: [PATCH 34/76] Various codegen tweaks. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 223 +++++++++++++++++++++++---------------------- 1 file changed, 112 insertions(+), 111 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index e5cc0e190..93c787147 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -27,7 +27,6 @@ PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ - STACKABLE /* a push/popable register (r0, r6, r16, fp) */ GPR0 GPR1 GPR2 GPR3 GPR4 GPR5 GPR6 GPR7 GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 @@ -37,13 +36,13 @@ PROPERTIES REGISTERS - R0("r0") : GPR, REG, STACKABLE, GPR0. + R0("r0") : GPR, REG, GPR0. R1("r1") : GPR, REG, GPR1. R2("r2") : GPR, REG, GPR2. R3("r3") : GPR, REG, GPR3. R4("r4") : GPR, REG, GPR4. R5("r5") : GPR, REG, GPR5. - R6("r6") : GPR, REG, STACKABLE, GPR6 regvar. + R6("r6") : GPR, REG, GPR6 regvar. R7("r7") : GPR, REG, GPR7 regvar. R8("r8") : GPR, REG, GPR8 regvar. R9("r9") : GPR, REG, GPR9 regvar. @@ -54,14 +53,16 @@ REGISTERS R14("r14") : GPR, REG, GPR14 regvar. GP("r15") : GPR, GPRGP. + R16("r16") : GPR, GPR16. + R23("r23") : GPR. - FP("fp") : GPR, GPRFP, STACKABLE. + FP("fp") : GPR, GPRFP. SP("sp") : GPR, GPRSP. LR("lr") : GPR, GPRLR. PC("pc") : GPR, GPRPC. /* r26 to r31 are special and the code generator doesn't touch them. */ -#define SCRATCH R23 +#define SCRATCH R16 TOKENS @@ -69,6 +70,7 @@ TOKENS GPROFFSET = { GPR reg; INT off; } 4 off "(" reg ")". GPRGPR = { GPR reg1; GPR reg2; } 4 "(" reg1 "," reg2 ")". + GPRINC = { GPR reg; } 4 "(" reg ")++". /* Primitives */ @@ -119,6 +121,7 @@ INSTRUCTIONS bgt "b.gt" LABEL:ro. bgt "b.gt" LABEL:ro. bhi "b.hi" LABEL:ro. + bset XREG:rw, XREG+CONST:ro. b XREG+LABEL:ro. bl XREG+LABEL:ro. cmp XREG:ro, XREG+CONST:ro kills :cc. @@ -132,6 +135,7 @@ INSTRUCTIONS fdiv XREG:wo, XREG:ro, XREG:ro. fmul XREG:wo, XREG:ro, XREG:ro. fsub XREG:wo, XREG:ro, XREG:ro. + ld XREG:wo, GPRINC:rw. ld XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. ldb XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. ldh XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. @@ -144,9 +148,10 @@ INSTRUCTIONS mul XREG:rw, XREG+CONST:ro. neg XREG:rw, XREG+CONST:ro. or XREG:rw, XREG+CONST:ro. - pop STACKABLE:wo. - pop STACKABLE:wo, GPRLR+GPRPC:wo. - push STACKABLE:ro. + pop GPR0+GPR6+GPR16+GPRFP+GPRPC:wo. + pop GPR0+GPR6+GPR16+GPRFP:wo, GPRPC:wo. + push GPR0+GPR6+GPR16+GPRFP+GPRLR:ro. + push GPR0+GPR6+GPR16+GPRFP:ro, GPRLR:ro. sub XREG:wo, XREG:ro, CONST+XREG:ro. sub XREG:rw, XREG+CONST:ro. st XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. @@ -155,6 +160,7 @@ INSTRUCTIONS sths XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. invalid "invalid". + comment "!" LABEL:ro. @@ -210,53 +216,35 @@ TESTS STACKINGRULES - from STACKABLE to STACK + from GPR0+GPR6+GPR16 to STACK gen - push %1 + comment {LABEL, "push stackable"} + push %1 - from GPR to STACK - uses STACKABLE + from OP+GPRI to STACK gen - move %1, %a - push %a - - from GPR to STACK - gen - sub SP, SP, {CONST, 4} - st %1, {GPROFFSET, SP, 0} - - from GPRE to STACK - uses STACKABLE - gen - move %1, %a - push %a - - from GPRE to STACK - gen - sub SP, {CONST, 4} - st %1, {GPROFFSET, SP, 0} - - from OP to STACK - uses STACKABLE - gen - move %1, %a - push %a - - from OP to STACK - gen - COMMENT("fallback stack") + comment {LABEL, "push via scratch"} move %1, SCRATCH - sub SP, SP, {CONST, 4} - st SCRATCH, {GPROFFSET, SP, 0} + push SCRATCH + COERCIONS - from GPRE + from GPRI uses reusing %1, REG=%1 yields %a + from GPR + yields {GPRE, %1} + + from OP + uses GPR0 + gen + move %1, %a + yields %a + from OP uses REG gen @@ -264,10 +252,10 @@ COERCIONS yields %a from STACK - uses STACKABLE + uses REG gen - COMMENT("coerce STACK->REG") - pop %a + pop R0 + move R0, %a yields %a @@ -619,10 +607,8 @@ PATTERNS los QUAD pat los /* Load arbitrary size */ - with STACK - kills ALL - gen - bl {LABEL, ".los"} + leaving + cal ".los" pat sti $1==BYTE /* Store byte indirect */ with LABEL GPRI+SIGNEX8+SIGNEX16 @@ -735,6 +721,9 @@ PATTERNS /* Word arithmetic */ + pat loc adi $1==0 /* Add nothing */ + /* nop */ + pat adi $1==QUAD /* Add word (second + top) */ with GPRI+CONST GPRI uses reusing %2, REG=%2 @@ -747,6 +736,9 @@ PATTERNS add %a, %2 yields %a + pat loc sbi $1==0 /* Subtract nothing */ + /* nop */ + pat sbi $1==QUAD /* Subtract word (second - top) */ with GPRI+CONST GPRI uses reusing %2, REG=%2 @@ -886,16 +878,6 @@ PATTERNS yields %a - pat and !defined($1) /* AND set */ - with STACK - gen - bl {LABEL, ".and"} - - pat ior !defined($1) /* OR set */ - with STACK - gen - bl {LABEL, ".ior"} - pat xor $1==4 /* XOR word */ with GPR GPR yields {XOR_RR, %1, %2} @@ -1055,33 +1037,48 @@ PATTERNS /* Sets */ - pat set defined($1) /* Create word with set bit */ + pat set $1==QUAD /* Create quad with one bit set */ + with GPRI + uses reusing %1, REG + gen + bset %a, %1 + yields %a + + pat set defined($1) /* Any other set */ leaving - loc 1 - exg $1 - sli $1 + loc $1 + cal ".set" pat set !defined($1) /* Create structure with set bit (variable) */ - with STACK - gen - bl {LABEL, ".set"} + leaving + cal ".set" pat inn defined($1) /* Test for set bit */ leaving - set QUAD - and QUAD + set $1 + and $1 pat inn !defined($1) /* Test for set bit (variable) */ - with GPR0 STACK - gen - bl {LABEL, ".inn"} - yields R0 + leaving + cal ".inn" - pat ior !defined($1) /* Or two sets */ - with STACK - gen - bl {LABEL, ".ior"} + pat ior !nicesize($1) /* OR set */ + leaving + cal ".ior" + pat ior !defined($1) /* OR set */ + leaving + cal ".ior" + + pat and !nicesize($1) /* AND set */ + leaving + loc $1 + cal ".and" + + pat and !defined($1) /* AND set */ + leaving + cal ".and" + /* Boolean resolutions */ @@ -1171,15 +1168,23 @@ PATTERNS pat ble call anyb("b.le") /* Branch if signed second <= top */ proc cmu_z example cmu zeq - with GPR+CONST GPRI STACK + with GPRI+CONST GPRI STACK gen cmp %2, %1 beq[1] {LABEL, $2} + pat cmu zeq call cmu_z("b.eq") /* Branch if unsigned second == top */ + pat cmu zne call cmu_z("b.ne") /* Branch if unsigned second != top */ pat cmu zgt call cmu_z("b.hi") /* Branch if unsigned second > top */ pat cmu zlt call cmu_z("b.lo") /* Branch if unsigned second < top */ pat cmu zge call cmu_z("b.hs") /* Branch if unsigned second >= top */ pat cmu zle call cmu_z("b.ls") /* Branch if unsigned second <= top */ + pat cmi zeq call cmu_z("b.eq") /* Branch if signed second == top */ + pat cmi zne call cmu_z("b.ne") /* Branch if signed second != top */ + pat cmi zgt call cmu_z("b.gt") /* Branch if signed second > top */ + pat cmi zlt call cmu_z("b.lt") /* Branch if signed second < top */ + pat cmi zge call cmu_z("b.ge") /* Branch if signed second >= top */ + pat cmi zle call cmu_z("b.le") /* Branch if signed second <= top */ proc cmf_z example cmu zeq with GPRI GPRI STACK @@ -1268,6 +1273,9 @@ PATTERNS pat lfr $1==QUAD /* Load function result, word */ yields R0 + pat lfr $1==QUAD*2 /* Load function result, word */ + yields R1 R0 + pat ret $1==0 /* Return from procedure */ gen return @@ -1280,9 +1288,12 @@ PATTERNS return mov SP, FP pop FP, PC - with STACK + + pat ret $1==QUAD*2 /* Return from procedure, word */ + with GPRI GPRI gen - pop R0 + move %1, R0 + move %2, R1 return mov SP, FP pop FP, PC @@ -1335,9 +1346,8 @@ PATTERNS ste ".ignmask" pat trp /* Raise EM trap */ - with GPR0 - gen - bl {LABEL, ".trap"} + leaving + cal ".trap" pat sig /* Set trap handler */ leaving @@ -1467,47 +1477,38 @@ PATTERNS pat dvf call simple_f("fdiv") /* Float divide (second / top) */ pat loc loc cff $1==$2 && $1==QUAD /* Convert float to float */ - leaving + leaving nop pat loc loc cfi $1==$2 && $1==QUAD /* Convert float -> integer */ - with GPR0 - gen - bl {LABEL, ".cfi"} - yields R0 + leaving + cal ".cfi" + lfr QUAD pat loc loc cfu $1==$2 && $1==QUAD /* Convert float -> unsigned */ - with GPR0 - gen - bl {LABEL, ".cfu"} - yields R0 + leaving + cal ".cfu" + lfr QUAD pat loc loc cif $1==$2 && $1==QUAD /* Convert integer -> float */ - with GPR0 - gen - bl {LABEL, ".cif"} - yields R0 + leaving + cal ".cif" + lfr QUAD pat loc loc cuf $1==$2 && $1==QUAD /* Convert unsigned -> float */ - with GPR0 - gen - bl {LABEL, ".cuf"} - yields R0 + leaving + cal ".cuf" + lfr QUAD pat fef /* Split float */ - with GPR0 - kills GPR1 - gen - bl {LABEL, ".fef"} - yields R0 R1 + leaving + cal ".cuf" + lfr QUAD*2 pat fif /* Multiply float and split (?) */ - with GPRI GPRI - kills GPR0, GPR1 - gen - fmul R0, %2, %1 - bl {LABEL, ".fef"} - yields R0 R1 + leaving + mlf QUAD + fef pat zrf /* Load a floating zero */ leaving From 98e745d04ce77f7cbdffc860817e6724a049f09f Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 00:33:01 +0100 Subject: [PATCH 35/76] Fix awful bug where uint32_t was sometimes defined as a short. --HG-- branch : dtrg-videocore --- lang/cem/libcc.ansi/headers/stdint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/cem/libcc.ansi/headers/stdint.h b/lang/cem/libcc.ansi/headers/stdint.h index 86fd04815..ce9cf3e38 100644 --- a/lang/cem/libcc.ansi/headers/stdint.h +++ b/lang/cem/libcc.ansi/headers/stdint.h @@ -26,7 +26,7 @@ typedef unsigned short uint16_t; #if _EM_WSIZE == 4 typedef signed int int32_t; -typedef unsigned short uint32_t; +typedef unsigned int uint32_t; #else typedef signed long int32_t; typedef unsigned long uint32_t; From 472f7783422bc372c6f018e5a867b67833f83c44 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 00:33:38 +0100 Subject: [PATCH 36/76] Don't write out constant data as big-endian! Some other cleanups. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach0.c | 1 - mach/vc4/ncg/mach.c | 28 +++++++++------------------- mach/vc4/ncg/table | 6 ++++++ 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/mach/vc4/as/mach0.c b/mach/vc4/as/mach0.c index d12994de0..2df70061c 100644 --- a/mach/vc4/as/mach0.c +++ b/mach/vc4/as/mach0.c @@ -30,4 +30,3 @@ typedef unsigned long quad; #undef VALWIDTH #define VALWIDTH 8 -#define FIXUPFLAGS (RELBR | RELWR) diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c index 375d8a6e7..90649b9af 100644 --- a/mach/vc4/ncg/mach.c +++ b/mach/vc4/ncg/mach.c @@ -10,9 +10,7 @@ int framesize; -/* - * machine dependent back end routines for the Zilog Z80. - */ +/* Write out a constant data section. */ con_part(int sz, word w) { @@ -20,19 +18,12 @@ con_part(int sz, word w) part_size++; if (part_size == TEM_WSIZE) part_flush(); - if (sz == 1) { - w &= 0xFF; - w <<= 8*(3-part_size); - part_word |= w; - } else if (sz == 2) { - w &= 0xFFFF; - if (part_size == 0) { - /* Shift 8 for m68k2, 16 otherwise */ - w <<= 4 * TEM_WSIZE; - } + if (sz == 1 || sz == 2) { + w &= (sz == 1 ? 0xFF : 0xFFFF); + w <<= 8 * part_size; part_word |= w; } else { - assert(sz == TEM_WSIZE); + assert(sz == 4); part_word = w; } part_size += sz; @@ -40,7 +31,6 @@ con_part(int sz, word w) con_mult(word sz) { - if (argval != 4) fatal("bad icon/ucon size"); fprintf(codefile,".data4 %s\n", str); @@ -48,9 +38,9 @@ con_mult(word sz) #define CODE_GENERATOR #define IEEEFLOAT -#define FL_MSL_AT_LOW_ADDRESS 1 -#define FL_MSW_AT_LOW_ADDRESS 1 -#define FL_MSB_AT_LOW_ADDRESS 1 +#define FL_MSL_AT_LOW_ADDRESS 0 +#define FL_MSW_AT_LOW_ADDRESS 0 +#define FL_MSB_AT_LOW_ADDRESS 0 #include void prolog(full nlocals) @@ -149,7 +139,7 @@ static void saveloadregs(const char* op) if (minreg != 32) { fprintf(codefile, "! saving registers %d to %d\n", minreg, maxreg); - assert(minreg == 6); + assert((minreg == 6) || (minreg == 16)); fprintf(codefile, "%s r6-r%d\n", op, maxreg); } diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 93c787147..8a23c413e 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -221,6 +221,12 @@ STACKINGRULES comment {LABEL, "push stackable"} push %1 + from OP+GPRI to STACK + uses GPR0 + gen + move %1, %a + push %a + from OP+GPRI to STACK gen comment {LABEL, "push via scratch"} From ec25fec145ab1a0c33e8641af967ed3f8341acfa Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 00:35:29 +0100 Subject: [PATCH 37/76] Build binaries that will run bare metal. Add enough syscall library to init the uart and write text. --HG-- branch : dtrg-videocore rename : plat/rpi/libsys/libsys.h => plat/rpi/libsys/libsysasm.h rename : plat/rpi/libsys/_sys_rawread.s => plat/rpi/libsys/phys_to_user.s rename : plat/rpi/libsys/_sys_rawread.s => plat/rpi/libsys/uart.s rename : plat/rpi/libsys/_sys_rawread.s => plat/rpi/libsys/user_to_phys.s --- plat/rpi/boot.s | 12 ++- plat/rpi/build.mk | 6 +- plat/rpi/include/pi.h | 21 +++++ plat/rpi/libsys/libsysasm.h | 20 +++++ plat/rpi/libsys/phys_to_user.s | 20 +++++ plat/rpi/libsys/uart.s | 135 +++++++++++++++++++++++++++++++++ plat/rpi/libsys/user_to_phys.s | 20 +++++ 7 files changed, 229 insertions(+), 5 deletions(-) create mode 100644 plat/rpi/include/pi.h create mode 100644 plat/rpi/libsys/libsysasm.h create mode 100644 plat/rpi/libsys/phys_to_user.s create mode 100644 plat/rpi/libsys/uart.s create mode 100644 plat/rpi/libsys/user_to_phys.s diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 43811a6ad..523cf40dc 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -19,14 +19,17 @@ #define STACKSIZE 16*1024 ! MAIN ENTRY POINT -! -! When running as a kernel, our parameters are passed in in r0-r5, so -! the startup sequence mustn't disturb these. begtext: + ! This empty space is required by the boot loader. + + b start + .space 508 ! 512 minus space needed for branch instruction + ! Wipe the bss. This must happen absolutely first, because we need ! to store the old system registers into it. - + +start: lea r6, begbss lea r7, endbss mov r8, #0 @@ -79,6 +82,7 @@ _1: .define __exit __exit: + mov r0, sr ld fp, .returnfp ld sp, .returnsp ld lr, .returnlr diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 9716eb058..dffe59d9b 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -13,10 +13,15 @@ D := plat/rpi/ platform-headers := \ unistd.h \ + pi.h \ ack/config.h platform-libsys := \ _hol0.s \ + phys_to_user.s \ + user_to_phys.s \ + uart.s \ + write.c \ ifeq (x,y) errno.s \ @@ -26,7 +31,6 @@ ifeq (x,y) creat.c \ close.c \ read.c \ - write.c \ brk.c \ getpid.c \ kill.c \ diff --git a/plat/rpi/include/pi.h b/plat/rpi/include/pi.h new file mode 100644 index 000000000..a69cdd8d9 --- /dev/null +++ b/plat/rpi/include/pi.h @@ -0,0 +1,21 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef PI_H +#define PI_H + +/* Initialise the mini UART (only do this if running on bare metal! */ +extern void init_uart(void); + +/* Converts a pointer from a physical address to a user address. */ +extern void* phys_to_user(void* ptr); + +/* Converts a pointer from a user address to a physical address. */ +extern void* user_to_phys(void* ptr); + +#endif + diff --git a/plat/rpi/libsys/libsysasm.h b/plat/rpi/libsys/libsysasm.h new file mode 100644 index 000000000..16dbbcfba --- /dev/null +++ b/plat/rpi/libsys/libsysasm.h @@ -0,0 +1,20 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef LIBSYSASM_H +#define LIBSYSASM_H + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +#define gp r15 + +#endif diff --git a/plat/rpi/libsys/phys_to_user.s b/plat/rpi/libsys/phys_to_user.s new file mode 100644 index 000000000..649b19b5a --- /dev/null +++ b/plat/rpi/libsys/phys_to_user.s @@ -0,0 +1,20 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Transforms a physical address into a user address. + +.define _phys_to_user +_phys_to_user: + ld r0, 0 (sp) + sub r0, gp + b lr + diff --git a/plat/rpi/libsys/uart.s b/plat/rpi/libsys/uart.s new file mode 100644 index 000000000..b12f2efd2 --- /dev/null +++ b/plat/rpi/libsys/uart.s @@ -0,0 +1,135 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Because of the low system clock rate, this baud rate might be inaccurate +! So be careful with your serial/terminal, some adjustment may be necessary. +TARGET_BAUD_RATE = 115200 + +! System clock is running directly off the 19.2MHz crystal at initial reset +SYSTEM_CLOCK = 19200000 + +GPFSEL1 = 0x7e200004 +GPSET0 = 0x7e20001C +GPCLR0 = 0x7e200028 +GPPUD = 0x7e200094 +GPPUDCLK0 = 0x7e200098 + +AUX_ENABLES = 0x7e215004 +AUX_MU_IO_REG = 0x7e215040 +AUX_MU_IER_REG = 0x7e215044 +AUX_MU_IIR_REG = 0x7e215048 +AUX_MU_LCR_REG = 0x7e21504C +AUX_MU_MCR_REG = 0x7e215050 +AUX_MU_LSR_REG = 0x7e215054 +AUX_MU_MSR_REG = 0x7e215058 +AUX_MU_SCRATCH = 0x7e21505C +AUX_MU_CNTL_REG = 0x7e215060 +AUX_MU_STAT_REG = 0x7e215064 +AUX_MU_BAUD_REG = 0x7e215068 + +! Sets up the mini UART for use as a console. + +.define _init_uart +_init_uart: + ! Configure TX and RX GPIO pins for Mini Uart function. + mov r1, #GPFSEL1 + ld r0, (r1) + and r0, #~[7<<12] + or r0, #2<<12 + and r0, #~[7<<15] + or r0, #2<<15 + st r0, (r1) + + mov r1, #GPPUD + mov r0, #0 + st r0, (r1) + +delay1: + add r0, #1 + cmp r0, #150 + b.ne delay1 + + mov r1, #GPPUDCLK0 + mov r0, #[1<<14]|[1<<15] + st r0, (r1) + + mov r0, #0 +delay2: + add r0, #1 + cmp r0, #150 + b.ne delay2 + + mov r1, #GPPUDCLK0 + mov r0, #0 + st r0, (r1) + + ! Set up serial port + mov r1, #AUX_ENABLES + mov r0, #1 + st r0, (r1) + + mov r1, #AUX_MU_IER_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_CNTL_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_LCR_REG + mov r0, #3 + st r0, (r1) + + mov r1, #AUX_MU_MCR_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_IER_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_IIR_REG + mov r0, #0xC6 + st r0, (r1) + + mov r1, #AUX_MU_BAUD_REG + mov r0, #[[SYSTEM_CLOCK/[TARGET_BAUD_RATE*8]]-1] + st r0, (r1) + + mov r1, #AUX_MU_LCR_REG + mov r0, #3 + st r0, (r1) + + mov r1, #AUX_MU_CNTL_REG + mov r0, #3 + st r0, (r1) + + b lr + +! Send a single byte. + +.define __sys_rawwrite +__sys_rawwrite: + ld r0, (sp) + mov r1, #AUX_MU_LSR_REG + ! loop until space available in Tx buffer +sendwait: + ld r2, (r1) + and r2, #0x20 + cmp r2, #0x20 + b.ne sendwait + + mov r1, #AUX_MU_IO_REG + stb r0, (r1) + + b lr + diff --git a/plat/rpi/libsys/user_to_phys.s b/plat/rpi/libsys/user_to_phys.s new file mode 100644 index 000000000..7b988fdd6 --- /dev/null +++ b/plat/rpi/libsys/user_to_phys.s @@ -0,0 +1,20 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Transforms a user address into a physical address. + +.define _user_to_phys +_user_to_phys: + ld r0, 0 (sp) + add r0, gp + b lr + From 2ee79ab0b251e65f35b9203cc560c37c41bc07fc Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 13:31:01 +0100 Subject: [PATCH 38/76] Encode comparing branch correctly. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index 4c87209ee..ef0b50082 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -16,10 +16,10 @@ operation | OP_BRANCH CC expr { branch_instr(0, $2, &$3); } | OP_BRANCHLINK CC expr { branch_instr(1, $2, &$3); } - | OP_BRANCH GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, 0, $2, $4, &$6); } - | OP_BRANCH CC GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, 0, $3, $5, &$7); } - | OP_BRANCH GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, 0, $2, $5, &$7); } - | OP_BRANCH CC GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, 0, $3, $6, &$8); } + | OP_BRANCH GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, $2, 0, $4, &$6); } + | OP_BRANCH CC GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, $3, 0, $5, &$7); } + | OP_BRANCH GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, $2, 0, $5, &$7); } + | OP_BRANCH CC GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, $3, 0, $6, &$8); } | OP_ADDCMPB GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $4, $6, &$8); } | OP_ADDCMPB CC GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $5, $7, &$9); } | OP_ADDCMPB GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, $2, $5, $7, &$9); } From d7efb0a32c17775071bb8f13ac0962353fe22ab8 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 13:31:27 +0100 Subject: [PATCH 39/76] Implement .csa. --HG-- branch : dtrg-videocore rename : mach/vc4/libem/dummy.s => mach/vc4/libem/csa.s --- mach/vc4/libem/csa.s | 33 +++++++++++++++++++++++++++++++++ mach/vc4/libem/dummy.s | 14 -------------- mach/vc4/libem/videocore.h | 2 ++ 3 files changed, 35 insertions(+), 14 deletions(-) create mode 100644 mach/vc4/libem/csa.s delete mode 100644 mach/vc4/libem/dummy.s diff --git a/mach/vc4/libem/csa.s b/mach/vc4/libem/csa.s new file mode 100644 index 000000000..1628199a0 --- /dev/null +++ b/mach/vc4/libem/csa.s @@ -0,0 +1,33 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "videocore.h" + +.define .csa +.sect .data +.csa: + ! on entry: + ! r0 = un-fixed-up descriptor + ! r1 = value + add r0, gp + + ld r2, 4 (r0) ! check lower bound + cmp r1, r2 + mov.lo r1, r2 ! r1 = min(r1, r2) + + sub r1, r2 ! adjust value to be 0-based + + ld r2, 8 (r0) ! check upper bound + cmp r1, r2 + mov.hi r1, r2 ! r1 = max(r1, r2) + + add r1, #3 + ld r1, (r0, r1) ! load destination address + add r1, gp + b r1 ! ...and go + diff --git a/mach/vc4/libem/dummy.s b/mach/vc4/libem/dummy.s deleted file mode 100644 index fdbcc4c38..000000000 --- a/mach/vc4/libem/dummy.s +++ /dev/null @@ -1,14 +0,0 @@ -# -/* - * VideoCore IV support library for the ACK - * © 2013 David Given - * This file is redistributable under the terms of the 3-clause BSD license. - * See the file 'Copying' in the root of the distribution for the full text. - */ - -#include "videocore.h" - -.define __dummy -.sect .data -__dummy: - diff --git a/mach/vc4/libem/videocore.h b/mach/vc4/libem/videocore.h index 3e27a7e7b..8ccb981ee 100644 --- a/mach/vc4/libem/videocore.h +++ b/mach/vc4/libem/videocore.h @@ -13,3 +13,5 @@ .sect .data .sect .bss +#define gp r15 + From b6680a48cc6005801cfc0c246a6f1a2f30e43d1a Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 13:31:58 +0100 Subject: [PATCH 40/76] Disable register variables. The code is a bit worse, but having two stackable registers makes things much easier to understand. --HG-- branch : dtrg-videocore --- mach/vc4/build.mk | 2 +- mach/vc4/ncg/table | 69 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/mach/vc4/build.mk b/mach/vc4/build.mk index 06ea3edd4..36b801aff 100644 --- a/mach/vc4/build.mk +++ b/mach/vc4/build.mk @@ -1,5 +1,5 @@ arch-libem-vc4 := \ - dummy.s + csa.s arch-libend-vc4 = \ edata.s \ diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 8a23c413e..bd144a193 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -21,12 +21,19 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define nicesize(x) ((x)==BYTE || (x)==WORD || (x)==QUAD) +/* #define REGVARS */ + +#ifndef REGVARS +#define regvar +#define return +#endif PROPERTIES GPR /* any GPR */ REG /* any allocatable GPR */ + STACKABLE /* a register than can be used with push/pop */ GPR0 GPR1 GPR2 GPR3 GPR4 GPR5 GPR6 GPR7 GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 @@ -36,13 +43,17 @@ PROPERTIES REGISTERS - R0("r0") : GPR, REG, GPR0. + R0("r0") : GPR, REG, GPR0, STACKABLE. R1("r1") : GPR, REG, GPR1. R2("r2") : GPR, REG, GPR2. R3("r3") : GPR, REG, GPR3. R4("r4") : GPR, REG, GPR4. R5("r5") : GPR, REG, GPR5. - R6("r6") : GPR, REG, GPR6 regvar. +#if defined REGVARS + R6("r6") : GPR, REG, GPR6, STACKABLE. +#else + R6("r6") : GPR, GPR6. +#endif R7("r7") : GPR, REG, GPR7 regvar. R8("r8") : GPR, REG, GPR8 regvar. R9("r9") : GPR, REG, GPR9 regvar. @@ -62,7 +73,11 @@ REGISTERS PC("pc") : GPR, GPRPC. /* r26 to r31 are special and the code generator doesn't touch them. */ -#define SCRATCH R16 +#if defined REGVARS + #define SCRATCH R16 +#else + #define SCRATCH R6 +#endif TOKENS @@ -227,6 +242,12 @@ STACKINGRULES move %1, %a push %a + from OP to STACK + uses STACKABLE + gen + move %1, %a + push %a + from OP+GPRI to STACK gen comment {LABEL, "push via scratch"} @@ -369,8 +390,10 @@ PATTERNS add %a, {CONST, $1} yields %a +#if defined REGVARS pat lol inreg($1)>0 /* Load from local */ yields {GPRE, regvar($1)} +#endif pat lol /* Load quad from local */ uses REG @@ -383,11 +406,13 @@ PATTERNS lol $1 + QUAD*1 lol $1 + QUAD*0 +#if defined REGVARS pat stl inreg($1)>0 /* Store to local */ with CONST+GPRI kills regvar($1) gen move %1, {GPRE, regvar($1)} +#endif pat stl /* Store to local */ with GPRI @@ -398,12 +423,14 @@ PATTERNS leaving stl $1 + QUAD*0 stl $1 + QUAD*1 - + +#if defined REGVARS pat lil inreg($1)>0 /* Load from indirected local */ uses REG gen ld %a, {GPROFFSET, regvar($1), 0} yields %a +#endif pat lil /* Load from indirected local */ leaving @@ -424,19 +451,20 @@ PATTERNS leaving loc 0 stl $1 - + +#if defined REGVARS pat inl inreg($1)>0 /* Increment local in register */ kills regvar($1) gen add {GPRE, regvar($1)}, {CONST, 1} - + pat inl inreg($1)<=0 /* Increment local */ leaving lol $1 loc 1 adi QUAD stl $1 - + pat del inreg($1)>0 /* Decrement local in register */ kills regvar($1) gen @@ -448,6 +476,21 @@ PATTERNS loc 1 sbi QUAD stl $1 +#else + pat inl /* Increment local in register */ + leaving + lol $1 + loc 1 + adi QUAD + stl $1 + + pat del /* Decrement local in register */ + leaving + lol $1 + loc 1 + sbi QUAD + stl $1 +#endif /* Global variables */ @@ -1288,7 +1331,7 @@ PATTERNS mov SP, FP pop FP, PC - pat ret $1<=QUAD /* Return from procedure, word */ + pat ret $1==QUAD /* Return from procedure, word */ with GPR0 gen return @@ -1316,10 +1359,10 @@ PATTERNS bl {LABEL, "_memmove"} pat csa /* Array-lookup switch */ - with STACK + with GPR0 GPR1 STACK gen - bl {LABEL, ".csa"} - + b {LABEL, ".csa"} + pat csb /* Table-lookup switch */ with STACK gen @@ -1455,6 +1498,10 @@ PATTERNS gen add SP, %1 + pat asp $1==QUAD /* Adjust stack by constant amount */ + gen + pop SCRATCH + pat asp /* Adjust stack by constant amount */ leaving loc $1 From 3b07fee160754546e68c42b0ba69ed41ca91c0e4 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 23:26:10 +0100 Subject: [PATCH 41/76] Major bugfix where instructions weren't being shrunk correctly. (Turns out there's built-in support for doing this, which I hadn't found.) --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 2 +- mach/vc4/as/mach5.c | 219 ++++++++++++++++++-------------------------- 2 files changed, 88 insertions(+), 133 deletions(-) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index de164610e..315468f8e 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -10,7 +10,7 @@ #define ALWAYS 14 extern void alu_instr_reg(quad opcode, int cc, int rd, int ra, int rb); -extern void alu_instr_lit(quad opcode, int cc, int rd, int ra, quad value); +extern void alu_instr_lit(quad opcode, int cc, int rd, int ra, long value); extern void misc_instr_reg(quad opcode, int cc, int rd, int ra, int rb); extern void misc_instr_lit(quad opcode, int cc, int rd, int ra, quad value); extern void branch_instr(int bl, int cc, struct expr_t* expr); diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 53ec3464c..a2227fd5e 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -34,12 +34,12 @@ void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) /* Assemble an ALU instruction where rb is a literal. */ -void alu_instr_lit(quad op, int cc, int rd, int ra, quad value) +void alu_instr_lit(quad op, int cc, int rd, int ra, long value) { /* 16 bit short form? */ - if ((cc == ALWAYS) && !(op & 1) && (value <= 0x1f) && (ra == rd) && - (ra < 0x10)) + if ((cc == ALWAYS) && !(op & 1) && (value >= 0) && (value <= 0x1f) && + (ra == rd) && (ra < 0x10)) { emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0)); return; @@ -47,7 +47,7 @@ void alu_instr_lit(quad op, int cc, int rd, int ra, quad value) /* 32 bit medium form? */ - if (value <= 0x1f) + if ((value >= 0) && (value <= 0x1f)) { emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); @@ -99,6 +99,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) { quad pc = DOTVAL; quad type = expr->typ & S_TYP; + int d; /* Sanity checking. */ @@ -107,71 +108,52 @@ void branch_instr(int bl, int cc, struct expr_t* expr) if (type == S_ABS) serror("can't use absolute addresses here"); - switch (pass) + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + d = (int32_t)expr->val - (int32_t)pc; + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + d /= 2; + + /* If this is a reference to code within this section, and it's + * close enough to the program counter, we can use a short- + * form instruction. */ + + if (small(!bl && (type == DOTTYP) && fitx(d, 7), 2)) + { + emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); + return; + } + + /* Absolute addresses and references to other sections + * need the full 32 bits. */ + + newrelo(expr->typ, RELOVC4|RELPC); + + if (bl) { - case 0: - /* Calculate size of instructions only. For now we just assume - * that they're going to be the maximum size, 32 bits. */ + quad v, hiv, lov; - emit4(0); - break; + if (!fitx(d, 27)) + toobig(); - case 1: - case 2: - { - /* The VC4 branch instructions express distance in 2-byte - * words. */ + v = maskx(d, 27); + hiv = v >> 23; + lov = v & 0x007fffff; + emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); + emit2(B16(00000000,00000000) | (lov&0xffff)); + } + else + { + quad v; - int d = ((int32_t)expr->val - (int32_t)pc) / 2; + if (!fitx(d, 23)) + toobig(); - /* We now know the worst case for the instruction layout. At - * this point we can emit the instructions, which may shrink - * the code. */ - - if (!bl && (type == DOTTYP)) - { - /* This is a reference to code within this section. If it's - * close enough to the program counter, we can use a short- - * form instruction. */ - - if (fitx(d, 7)) - { - emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); - break; - } - } - - /* Absolute addresses and references to other sections - * need the full 32 bits. */ - - newrelo(expr->typ, RELOVC4|RELPC); - - if (bl) - { - quad v, hiv, lov; - - if (!fitx(d, 27)) - toobig(); - - v = maskx(d, 27); - hiv = v >> 23; - lov = v & 0x007fffff; - emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); - emit2(B16(00000000,00000000) | (lov&0xffff)); - } - else - { - quad v; - - if (!fitx(d, 23)) - toobig(); - - v = maskx(d, 23); - emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); - emit2(B16(00000000,00000000) | (v&0xffff)); - } - break; - } + v = maskx(d, 23); + emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); + emit2(B16(00000000,00000000) | (v&0xffff)); } } @@ -352,66 +334,45 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) void mem_address_instr(quad opcode, int rd, struct expr_t* expr) { - static const char sizes[] = {4, 2, 1, 2}; + static const char sizes[] = {4, 4, 2, 2, 1, 1, 2, 2}; int size = sizes[opcode]; quad type = expr->typ & S_TYP; + int d, scaledd; /* Sanity checking. */ if (type == S_ABS) serror("can't use absolute addresses here"); - switch (pass) + d = expr->val - DOTVAL; + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + scaledd = d/size; + + /* If this is a reference to an address within this section, and + * it's close enough to the program counter, we can use a + * shorter instruction. */ + + if (small((type==DOTTYP) && fitx(scaledd, 16), 2)) { - case 0: - /* Calculate size of instructions only. For now we just assume - * that they're going to be the maximum size, 48 bits. */ + emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); + emit2(scaledd); + return; + } - emit2(0); - emit4(0); - break; + /* Otherwise we need the full 48 bits. */ - case 1: - case 2: - { - int d = expr->val - DOTVAL; + newrelo(expr->typ, RELOVC4|RELPC); - /* We now know the worst case for the instruction layout. At - * this point we can emit the instructions, which may shrink - * the code. */ + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ - if (type == DOTTYP) - { - int scaledd = d/size; + if (!fitx(d, 27)) + toobig(); - /* This is a reference to an address within this section. If - * it's close enough to the program counter, we can use a - * shorter instruction. */ - - if (fitx(scaledd, 16)) - { - emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); - emit2(scaledd); - return; - } - } - - /* Otherwise we need the full 48 bits. */ - - newrelo(expr->typ, RELOVC4|RELPC); - - /* VC4 relocations store the PC-relative delta into the - * destination section in the instruction data. The linker will - * massage this, and scale it appropriately. */ - - if (!fitx(d, 27)) - toobig(); - - emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); - emit4((31<<27) | maskx(d, 27)); - break; - } - } + emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); + emit4((31<<27) | maskx(d, 27)); } /* Common code for handling addcmp: merge in as much of expr as will fit to @@ -420,33 +381,23 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr) { quad type = expr->typ & S_TYP; + int d; - switch (pass) - { - case 0: - /* Calculate size of instructions only. */ + if (type != DOTTYP) + serror("can't use this type of branch to jump outside the section"); - emit2(0); - break; + /* The VC4 branch instructions express distance in 2-byte + * words. */ - case 1: - case 2: - { - if (type != DOTTYP) - serror("can't use this type of branch to jump outside the section"); + d = (expr->val - DOTVAL-2 + 4); + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + d /= 2; - /* The VC4 branch instructions express distance in 2-byte - * words. */ + if (!fitx(d, bits)) + serror("target of branch is too far away"); - int d = (expr->val - DOTVAL-2 + 4) / 2; - - if (!fitx(d, bits)) - serror("target of branch is too far away"); - - emit2(opcode | maskx(d, bits)); - break; - } - } + emit2(opcode | maskx(d, bits)); } void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr) @@ -518,6 +469,10 @@ void lea_address_instr(int rd, struct expr_t* expr) { quad pc = DOTVAL; quad type = expr->typ & S_TYP; + int d = expr->val - pc; + + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; if (type == S_ABS) serror("can't use absolute addresses here"); From 6a672d5e96038863066b5e3fbe7d7889976077b2 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 23:28:47 +0100 Subject: [PATCH 42/76] Heap allocations now works. --HG-- branch : dtrg-videocore --- plat/rpi/build.mk | 16 ++++++++-------- plat/rpi/libsys/brk.c | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index dffe59d9b..a522046f3 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -18,23 +18,23 @@ platform-headers := \ platform-libsys := \ _hol0.s \ + errno.s \ phys_to_user.s \ user_to_phys.s \ uart.s \ - write.c \ - -ifeq (x,y) - errno.s \ - _sys_rawread.s \ - _sys_rawwrite.s \ - open.c \ creat.c \ close.c \ + open.c \ read.c \ + write.c \ + isatty.c \ brk.c \ + +ifeq (x,y) + _sys_rawread.s \ + _sys_rawwrite.s \ getpid.c \ kill.c \ - isatty.c \ lseek.c \ time.c \ signal.c diff --git a/plat/rpi/libsys/brk.c b/plat/rpi/libsys/brk.c index cff32b9a9..2c44347d4 100644 --- a/plat/rpi/libsys/brk.c +++ b/plat/rpi/libsys/brk.c @@ -8,25 +8,25 @@ #include #include #include +#include #define OUT_OF_MEMORY (void*)(-1) /* sbrk returns this on failure */ -#define STACK_BUFFER 128 /* number of bytes to leave for stack */ +#define STACK_BUFFER 1024 /* number of bytes to leave for stack */ extern char _end[1]; static char* current = _end; +/* Top of heap: we assume that the block of memory the binary is loaded in + * is 256kB long. Because user pointers are always relative to the beginning + * of the block, this makes the end address easy to calculate. */ +static char* max = (char*) (256*1024); + int brk(void* newend) { - /* This variable is used to figure out the current stack pointer, - * by taking its address. */ - char dummy; - char* p = newend; - - if ((p > (&dummy - STACK_BUFFER)) || - (p < _end)) + if ((newend >= (void*)max) || (newend < (void*)_end)) return -1; - - current = p; + + current = newend; return 0; } From 8c21a2ef9b09a040e633bb5e55c97969e1e3dcd7 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 23:58:35 +0100 Subject: [PATCH 43/76] Stop fighting the terrible code and remove the regvar support --- it didn't help much and was a pain. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/mach.c | 105 -------------- mach/vc4/ncg/table | 346 +++++++++++++++++--------------------------- 2 files changed, 130 insertions(+), 321 deletions(-) diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c index 90649b9af..124e8a965 100644 --- a/mach/vc4/ncg/mach.c +++ b/mach/vc4/ncg/mach.c @@ -84,108 +84,3 @@ char *segname[] = { ".sect .bss" }; -#ifdef REGVARS - -static int savedregsi[32]; -static int numsaved; - -/* Initialise regvar system for one function. */ - -void i_regsave(void) -{ - int i; - - fprintf(codefile, "! i_regsave()\n"); - for (i=0; i<32; i++) - savedregsi[i] = INT_MAX; - numsaved = 0; -} - -/* Mark a register as being saved. */ - -void regsave(const char* regname, full offset, int size) -{ - int regnum = atoi(regname+1); - savedregsi[regnum] = offset; - numsaved++; - - fprintf(codefile, "! %d is saved in %s\n", offset, regname); -#if 0 - fprintf(codefile, "stwu %s, -4(sp)\n", regname); - if (offset >= 0) - fprintf(codefile, "lwz %s, %d(fp)\n", regname, offset); -#endif -} - -/* Finish saving ragisters. */ - -static void saveloadregs(const char* op) -{ - int minreg = 32; - int maxreg = -1; - int i; - - for (i=0; i<32; i++) - { - if (savedregsi[i] != INT_MAX) - { - if (i < minreg) - minreg = i; - if (i > maxreg) - maxreg = i; - } - } - - if (minreg != 32) - { - fprintf(codefile, "! saving registers %d to %d\n", minreg, maxreg); - assert((minreg == 6) || (minreg == 16)); - - fprintf(codefile, "%s r6-r%d\n", op, maxreg); - } -} - -f_regsave() -{ - int i; - fprintf(codefile, "! f_regsave()\n"); - saveloadregs("push"); - - for (i=0; i<32; i++) - { - int o = savedregsi[i]; - if ((o != INT_MAX) && (o > 0)) - fprintf(codefile, "ld r%d, %d (fp)\n", i, savedregsi[i]); - } -} - -/* Restore all saved registers. */ - -regreturn() -{ - fprintf(codefile, "! regreturn()\n"); - saveloadregs("pop"); -} - -/* Calculate the score of a given register. */ - -int regscore(full offset, int size, int type, int frequency, int totype) -{ - int score; - - fprintf(codefile, "! regscore(%ld, %d, %d, %d, %d)\n", offset, size, type, frequency, totype); - - if (size != 4) - return -1; - - /* Per use: 6 bytes (on average) - * Overhead in prologue: 4 bytes, plus 4 if a parameter - * Overhead in epilogue: 0 bytes - */ - - score = frequency*6 - 4 - ((offset>=0) ? 4 : 0); - fprintf(codefile, "! local at offset %d has regvar score %d\n", offset, score); - return score; -} - -#endif diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index bd144a193..523879cfe 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -21,12 +21,6 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define nicesize(x) ((x)==BYTE || (x)==WORD || (x)==QUAD) -/* #define REGVARS */ - -#ifndef REGVARS -#define regvar -#define return -#endif PROPERTIES @@ -49,19 +43,15 @@ REGISTERS R3("r3") : GPR, REG, GPR3. R4("r4") : GPR, REG, GPR4. R5("r5") : GPR, REG, GPR5. -#if defined REGVARS - R6("r6") : GPR, REG, GPR6, STACKABLE. -#else R6("r6") : GPR, GPR6. -#endif - R7("r7") : GPR, REG, GPR7 regvar. - R8("r8") : GPR, REG, GPR8 regvar. - R9("r9") : GPR, REG, GPR9 regvar. - R10("r10") : GPR, REG, GPR10 regvar. - R11("r11") : GPR, REG, GPR11 regvar. - R12("r12") : GPR, REG, GPR12 regvar. - R13("r13") : GPR, REG, GPR13 regvar. - R14("r14") : GPR, REG, GPR14 regvar. + R7("r7") : GPR, REG, GPR7. + R8("r8") : GPR, REG, GPR8. + R9("r9") : GPR, REG, GPR9. + R10("r10") : GPR, REG, GPR10. + R11("r11") : GPR, REG, GPR11. + R12("r12") : GPR, REG, GPR12. + R13("r13") : GPR, REG, GPR13. + R14("r14") : GPR, REG, GPR14. GP("r15") : GPR, GPRGP. R16("r16") : GPR, GPR16. @@ -73,11 +63,7 @@ REGISTERS PC("pc") : GPR, GPRPC. /* r26 to r31 are special and the code generator doesn't touch them. */ -#if defined REGVARS - #define SCRATCH R16 -#else #define SCRATCH R6 -#endif TOKENS @@ -92,10 +78,6 @@ TOKENS LABEL = { ADDR adr; } 4 adr. CONST = { INT val; } 4 "#" val. -/* Allows us to use regvar() to refer to registers */ - - GPRE = { GPR reg; } 4 reg. - /* Sign extended values. */ /* The size refers to the *source*. */ @@ -114,65 +96,64 @@ TOKENS SETS TOKEN = LABEL + CONST. - GPRI = GPR + GPRE. OP = TOKEN + SIGNEX8 + SIGNEX16. - XREG = GPRI + SIGNEX8 + SIGNEX16. + ANY = GPR + OP. INSTRUCTIONS - add XREG:wo, XREG:ro, XREG+CONST:ro. - add XREG:rw, XREG+CONST:ro. - adds2 XREG:rw, XREG+CONST:ro. - adds4 XREG:rw, XREG+CONST:ro. - adds8 XREG:rw, XREG+CONST:ro. - adds16 XREG:rw, XREG+CONST:ro. - adds256 XREG:rw, XREG:rw, XREG:ro. - and XREG:rw, XREG+CONST:ro. - asr XREG:rw, XREG+CONST:ro. + add GPR:wo, GPR:ro, GPR+CONST:ro. + add GPR:rw, GPR+CONST:ro. + adds2 GPR:rw, GPR+CONST:ro. + adds4 GPR:rw, GPR+CONST:ro. + adds8 GPR:rw, GPR+CONST:ro. + adds16 GPR:rw, GPR+CONST:ro. + adds256 GPR:rw, GPR:rw, GPR:ro. + and GPR:rw, GPR+CONST:ro. + asr GPR:rw, GPR+CONST:ro. beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. bgt "b.gt" LABEL:ro. bgt "b.gt" LABEL:ro. bhi "b.hi" LABEL:ro. - bset XREG:rw, XREG+CONST:ro. - b XREG+LABEL:ro. - bl XREG+LABEL:ro. - cmp XREG:ro, XREG+CONST:ro kills :cc. - divs XREG:wo, XREG:ro, XREG+CONST:ro. - divu XREG:wo, XREG:ro, XREG+CONST:ro. - eor XREG:rw, XREG+CONST:ro. - exts XREG:wo, XREG:ro, XREG+CONST:ro. - exts XREG:rw, XREG+CONST:ro. - fadd XREG:wo, XREG:ro, XREG:ro. - fcmp XREG:wo, XREG:ro, XREG:ro. - fdiv XREG:wo, XREG:ro, XREG:ro. - fmul XREG:wo, XREG:ro, XREG:ro. - fsub XREG:wo, XREG:ro, XREG:ro. - ld XREG:wo, GPRINC:rw. - ld XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldb XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldh XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. - ldhs XREG:wo, GPROFFSET+GPRGPR+LABEL:ro. - lea XREG:wo, LABEL:ro. - lsl XREG:rw, XREG+CONST:ro. - lsl XREG:wo, XREG:ro, XREG+CONST:ro. - lsr XREG:rw, XREG+CONST:ro. - mov XREG:wo, XREG+CONST:ro. - mul XREG:rw, XREG+CONST:ro. - neg XREG:rw, XREG+CONST:ro. - or XREG:rw, XREG+CONST:ro. + bset GPR:rw, GPR+CONST:ro. + b GPR+LABEL:ro. + bl GPR+LABEL:ro. + cmp GPR:ro, GPR+CONST:ro kills :cc. + divs GPR:wo, GPR:ro, GPR+CONST:ro. + divu GPR:wo, GPR:ro, GPR+CONST:ro. + eor GPR:rw, GPR+CONST:ro. + exts GPR:wo, GPR:ro, GPR+CONST:ro. + exts GPR:rw, GPR+CONST:ro. + fadd GPR:wo, GPR:ro, GPR:ro. + fcmp GPR:wo, GPR:ro, GPR:ro. + fdiv GPR:wo, GPR:ro, GPR:ro. + fmul GPR:wo, GPR:ro, GPR:ro. + fsub GPR:wo, GPR:ro, GPR:ro. + ld GPR:wo, GPRINC:rw. + ld GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldb GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldh GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldhs GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + lea GPR:wo, LABEL:ro. + lsl GPR:rw, GPR+CONST:ro. + lsl GPR:wo, GPR:ro, GPR+CONST:ro. + lsr GPR:rw, GPR+CONST:ro. + mov GPR:wo, GPR+CONST:ro. + mul GPR:rw, GPR+CONST:ro. + neg GPR:rw, GPR+CONST:ro. + or GPR:rw, GPR+CONST:ro. pop GPR0+GPR6+GPR16+GPRFP+GPRPC:wo. pop GPR0+GPR6+GPR16+GPRFP:wo, GPRPC:wo. push GPR0+GPR6+GPR16+GPRFP+GPRLR:ro. push GPR0+GPR6+GPR16+GPRFP:ro, GPRLR:ro. - sub XREG:wo, XREG:ro, CONST+XREG:ro. - sub XREG:rw, XREG+CONST:ro. - st XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. - stb XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. - sth XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. - sths XREG:ro, GPROFFSET+GPRGPR+LABEL:ro. + sub GPR:wo, GPR:ro, CONST+GPR:ro. + sub GPR:rw, GPR+CONST:ro. + st GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + stb GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + sth GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + sths GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. invalid "invalid". comment "!" LABEL:ro. @@ -186,13 +167,6 @@ MOVES COMMENT("mov GPR->GPR") mov %2, %1 -/* GPRE exists solely to allow us to use regvar() (which can only be used in - an expression) as a register constant. */ - - from GPRE to GPR - gen - mov %2, %1 - /* Constants */ from CONST to GPR @@ -216,9 +190,9 @@ MOVES /* Miscellaneous */ - from CONST+LABEL+GPR+GPRE to GPRE + from CONST+LABEL+GPR to GPR gen - move %1, %2.reg + move %1, %2 TESTS @@ -236,7 +210,7 @@ STACKINGRULES comment {LABEL, "push stackable"} push %1 - from OP+GPRI to STACK + from OP+GPR to STACK uses GPR0 gen move %1, %a @@ -248,7 +222,7 @@ STACKINGRULES move %1, %a push %a - from OP+GPRI to STACK + from OP+GPR to STACK gen comment {LABEL, "push via scratch"} move %1, SCRATCH @@ -259,19 +233,6 @@ STACKINGRULES COERCIONS - from GPRI - uses reusing %1, REG=%1 - yields %a - - from GPR - yields {GPRE, %1} - - from OP - uses GPR0 - gen - move %1, %a - yields %a - from OP uses REG gen @@ -281,8 +242,8 @@ COERCIONS from STACK uses REG gen - pop R0 - move R0, %a + pop SCRATCH + move SCRATCH, %a yields %a @@ -295,15 +256,15 @@ PATTERNS yields {CONST, $1} pat dup $1<=QUAD /* Duplicate word on top of stack */ - with GPRI + with ANY yields %1 %1 pat dup $1<=(2*QUAD) /* Duplicate word pair on top of stack */ - with GPRI GPRI + with ANY ANY yields %1 %2 %1 %2 pat exg $1==QUAD /* Exchange top two words on stack */ - with GPRI GPRI + with ANY ANY yields %1 %2 pat stl lol $1==$2 /* Store then load local */ @@ -362,8 +323,6 @@ PATTERNS pat loc loc cii $1==BYTE && $2>BYTE /* signed char -> anything */ with GPR yields {SIGNEX8, %1} - with GPRE - yields {SIGNEX8, %1.reg} with SIGNEX8 yields {SIGNEX8, %1.reg} with SIGNEX16 @@ -372,8 +331,6 @@ PATTERNS pat loc loc cii $1==WORD && $2>WORD /* signed short -> anything */ with GPR yields {SIGNEX16, %1} - with GPRE - yields {SIGNEX16, %1.reg} with SIGNEX8 yields {SIGNEX16, %1.reg} with SIGNEX16 @@ -390,11 +347,6 @@ PATTERNS add %a, {CONST, $1} yields %a -#if defined REGVARS - pat lol inreg($1)>0 /* Load from local */ - yields {GPRE, regvar($1)} -#endif - pat lol /* Load quad from local */ uses REG gen @@ -406,16 +358,8 @@ PATTERNS lol $1 + QUAD*1 lol $1 + QUAD*0 -#if defined REGVARS - pat stl inreg($1)>0 /* Store to local */ - with CONST+GPRI - kills regvar($1) - gen - move %1, {GPRE, regvar($1)} -#endif - pat stl /* Store to local */ - with GPRI + with GPR gen st %1, {GPROFFSET, FP, $1} @@ -424,14 +368,6 @@ PATTERNS stl $1 + QUAD*0 stl $1 + QUAD*1 -#if defined REGVARS - pat lil inreg($1)>0 /* Load from indirected local */ - uses REG - gen - ld %a, {GPROFFSET, regvar($1), 0} - yields %a -#endif - pat lil /* Load from indirected local */ leaving lol $1 @@ -452,31 +388,6 @@ PATTERNS loc 0 stl $1 -#if defined REGVARS - pat inl inreg($1)>0 /* Increment local in register */ - kills regvar($1) - gen - add {GPRE, regvar($1)}, {CONST, 1} - - pat inl inreg($1)<=0 /* Increment local */ - leaving - lol $1 - loc 1 - adi QUAD - stl $1 - - pat del inreg($1)>0 /* Decrement local in register */ - kills regvar($1) - gen - sub {GPRE, regvar($1)}, {CONST, 1} - - pat del inreg($1)<=0 /* Decrement local */ - leaving - lol $1 - loc 1 - sbi QUAD - stl $1 -#else pat inl /* Increment local in register */ leaving lol $1 @@ -490,7 +401,7 @@ PATTERNS loc 1 sbi QUAD stl $1 -#endif + /* Global variables */ @@ -552,7 +463,7 @@ PATTERNS loi QUAD pat ldf /* Load double offsetted */ - with GPRI + with GPR uses reusing %1, REG=%1, REG gen add %a, GP @@ -566,7 +477,7 @@ PATTERNS sti QUAD pat sdf /* Store double offsetted */ - with GPRI GPRI GPRI + with GPR GPR GPR uses reusing %3, REG=%3 gen add %a, GP @@ -589,11 +500,6 @@ PATTERNS gen ldb %a, {GPRGPR, %1, GP} yields %a - with GPRE - uses reusing %1.reg, REG - gen - ldb %a, {GPRGPR, %1.reg, GP} - yields %a pat loi loc loc cii $1==WORD && $2==WORD && $3==QUAD /* Load short indirect and sign extend */ with LABEL @@ -601,7 +507,7 @@ PATTERNS gen ldhs %a, %1 yields %a - with GPRI + with GPR uses reusing %1, REG gen add %a, %1, GP @@ -614,7 +520,7 @@ PATTERNS gen ldh %a, %1 yields %a - with GPRI + with GPR uses reusing %1, REG gen add %a, %1, GP @@ -627,7 +533,7 @@ PATTERNS gen ld %a, %1 yields %a - with GPRI + with GPR uses reusing %1, REG gen add %a, %1, GP @@ -642,7 +548,7 @@ PATTERNS ld %a, {GPROFFSET, %b, 0} ld %b, {GPROFFSET, %b, 4} yields %b %a - with GPRI + with GPR uses reusing %1, REG, REG gen add %b, %1, GP @@ -660,44 +566,55 @@ PATTERNS cal ".los" pat sti $1==BYTE /* Store byte indirect */ - with LABEL GPRI+SIGNEX8+SIGNEX16 + with LABEL GPR gen stb %2, %1 - with GPR GPRI+SIGNEX8+SIGNEX16 + with LABEL SIGNEX8+SIGNEX16 + gen + stb %2.reg, %1 + with GPR GPR gen stb %2, {GPRGPR, %1, GP} - with GPRE GPRI+SIGNEX8+SIGNEX16 + with GPR SIGNEX8+SIGNEX16 gen - stb %2, {GPRGPR, %1.reg, GP} + stb %2.reg, {GPRGPR, %1, GP} pat sti $1==WORD /* Store half-word indirect */ - with LABEL GPRI+SIGNEX16 + with LABEL GPR gen sth %2, %1 - with GPRI GPRI+SIGNEX16 + with LABEL SIGNEX16 + gen + sth %2.reg, %1 + with GPR GPR uses reusing %1, REG gen add %a, %1, GP sth %2, {GPROFFSET, %a, 0} + with GPR SIGNEX16 + uses reusing %1, REG + gen + add %a, %1, GP + sth %2.reg, {GPROFFSET, %a, 0} pat sti $1==QUAD /* Store quad indirect */ - with LABEL GPRI + with LABEL GPR gen st %2, %1 - with GPRI GPRI + with GPR GPR uses reusing %1, REG gen add %a, %1, GP st %2, {GPROFFSET, %a, 0} pat sti $1==2*QUAD /* Load double-quad indirect */ - with LABEL GPRI GPRI + with LABEL GPR GPR uses REG gen lea %a, %1 st %2, {GPROFFSET, %a, 0} st %3, {GPROFFSET, %a, 4} - with GPRI GPRI GPRI + with GPR GPR GPR uses reusing %1, REG=%1 gen add %a, GP @@ -774,12 +691,12 @@ PATTERNS /* nop */ pat adi $1==QUAD /* Add word (second + top) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen add %a, %1 yields %a - with GPRI GPRI+CONST + with GPR GPR+CONST uses reusing %1, REG=%1 gen add %a, %2 @@ -789,19 +706,19 @@ PATTERNS /* nop */ pat sbi $1==QUAD /* Subtract word (second - top) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen sub %a, %1 yields %a pat mli $1==QUAD /* Multiply word (second * top) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen mul %a, %1 yields %a - with GPRI GPRI+CONST + with GPR GPR+CONST uses reusing %1, REG=%1 gen mul %a, %2 @@ -812,21 +729,21 @@ PATTERNS mli $1 pat dvi $1==QUAD /* Divide word (second / top) */ - with GPRI GPRI + with GPR GPR uses reusing %2, REG gen divs %a, %2, %1 yields %a pat dvu $1==QUAD /* Divide unsigned word (second / top) */ - with GPRI GPRI + with GPR GPR uses reusing %2, REG gen divu %a, %2, %1 yields %a pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ - with GPRI GPRI + with GPR GPR uses REG gen divu %a, %2, %1 @@ -835,7 +752,7 @@ PATTERNS yields %a pat rmi $1==QUAD /* Remainder signed word (second % top) */ - with GPRI GPRI + with GPR GPR uses REG gen divs %a, %2, %1 @@ -844,64 +761,64 @@ PATTERNS yields %a pat ngi $1==QUAD /* Negate word */ - with GPRI + with GPR uses reusing %1, REG=%1 gen neg %a, %a yields %a pat and $1==QUAD /* AND word */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen and %a, %1 yields %a - with GPRI GPRI+CONST + with GPR GPR+CONST uses reusing %1, REG=%1 gen and %a, %2 yields %a pat ior $1==QUAD /* OR word */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen or %a, %1 yields %a - with GPRI GPRI+CONST + with GPR GPR+CONST uses reusing %1, REG=%1 gen or %a, %2 yields %a pat xor $1==QUAD /* XOR word */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen eor %a, %1 yields %a - with GPRI GPRI+CONST + with GPR GPR+CONST uses reusing %1, REG=%1 gen eor %a, %2 yields %a pat dvi $1==QUAD /* Divide word (second / top) */ - with GPRI GPRI + with GPR GPR uses reusing %2, REG gen divs %a, %2, %1 yields %a pat dvu $1==QUAD /* Divide unsigned word (second / top) */ - with GPRI GPRI + with GPR GPR uses reusing %2, REG gen divu %a, %2, %1 yields %a pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ - with GPRI GPRI + with GPR GPR uses REG gen divu %a, %2, %1 @@ -910,7 +827,7 @@ PATTERNS yields %a pat rmi $1==QUAD /* Remainder signed word (second % top) */ - with GPRI GPRI + with GPR GPR uses REG gen divs %a, %2, %1 @@ -966,21 +883,21 @@ PATTERNS #endif pat sli $1==4 /* Shift left (second << top) */ - with CONST+GPRI GPRI + with CONST+GPR GPR uses reusing %2, REG=%2 gen lsl %a, %1 yields %a pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST+GPRI GPRI + with CONST+GPR GPR uses reusing %2, REG=%2 gen asr %2, %1 yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST+GPRI GPRI + with CONST+GPR GPR uses reusing %2, REG=%2 gen lsr %2, %1 @@ -991,35 +908,35 @@ PATTERNS /* Special arithmetic */ pat loc sli adi $1==1 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<1) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen adds2 %a, %1 yields %a pat loc sli adi $1==2 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<2) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen adds4 %a, %1 yields %a pat loc sli adi $1==3 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<3) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen adds8 %a, %1 yields %a pat loc sli adi $1==4 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<4) */ - with GPRI+CONST GPRI + with GPR+CONST GPR uses reusing %2, REG=%2 gen adds16 %a, %1 yields %a pat loc sli adi $1==8 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<8) */ - with GPRI GPRI + with GPR GPR uses reusing %2, REG gen adds256 %a, %2, %1 @@ -1087,7 +1004,7 @@ PATTERNS /* Sets */ pat set $1==QUAD /* Create quad with one bit set */ - with GPRI + with GPR uses reusing %1, REG gen bset %a, %1 @@ -1133,7 +1050,7 @@ PATTERNS /* Boolean resolutions */ proc cm_t example teq - with GPRI GPRI + with GPR GPR uses reusing %1, REG gen cmp %1, %2 @@ -1155,7 +1072,7 @@ PATTERNS pat cmi tge call cm_t("add.ge") /* top = signed (second >= top) */ proc cmf_t example teq - with GPRI GPRI + with GPR GPR uses reusing %1, REG gen fcmp %a, %1, %2 @@ -1171,7 +1088,7 @@ PATTERNS pat cmf tge call cmf_t("add.hs") /* top = float (second >= top) */ proc fallback_t example teq - with GPRI + with GPR uses reusing %1, REG gen cmp %1, {CONST, 0} @@ -1191,7 +1108,7 @@ PATTERNS /* Simple branches */ proc anyz example zeq - with GPRI STACK + with GPR STACK gen cmp %1, {CONST, 0} beq[1] {LABEL, $1} @@ -1204,7 +1121,7 @@ PATTERNS pat zle call anyz("b.le") /* Branch if signed top <= 0 */ proc anyb example beq - with GPRI+CONST GPRI STACK + with GPR+CONST GPR STACK gen cmp %2, %1 beq[1] {LABEL, $1} @@ -1217,7 +1134,7 @@ PATTERNS pat ble call anyb("b.le") /* Branch if signed second <= top */ proc cmu_z example cmu zeq - with GPRI+CONST GPRI STACK + with GPR+CONST GPR STACK gen cmp %2, %1 beq[1] {LABEL, $2} @@ -1236,7 +1153,7 @@ PATTERNS pat cmi zle call cmu_z("b.le") /* Branch if signed second <= top */ proc cmf_z example cmu zeq - with GPRI GPRI STACK + with GPR GPR STACK gen fcmp %2, %2, %1 beq[1] {LABEL, $2} @@ -1327,23 +1244,20 @@ PATTERNS pat ret $1==0 /* Return from procedure */ gen - return mov SP, FP pop FP, PC pat ret $1==QUAD /* Return from procedure, word */ with GPR0 gen - return mov SP, FP pop FP, PC pat ret $1==QUAD*2 /* Return from procedure, word */ - with GPRI GPRI + with GPR GPR gen move %1, R0 move %2, R1 - return mov SP, FP pop FP, PC @@ -1480,12 +1394,12 @@ PATTERNS loe ".reghp" pat str $1==0 /* Store FP */ - with GPRI + with GPR gen sub FP, %1, GP pat str $1==1 /* Store SP */ - with GPRI + with GPR gen sub SP, %1, GP @@ -1494,7 +1408,7 @@ PATTERNS ste ".reghp" pat ass /* Adjust stack by variable amount */ - with CONST+GPRI + with CONST+GPR gen add SP, %1 @@ -1518,7 +1432,7 @@ PATTERNS sbf QUAD proc simple_f example adf - with GPRI GPRI + with GPR GPR uses reusing %1, REG gen fadd[1] %a, %2, %1 From e299cc3bcf7710f190b1695c82cde71453323ecc Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 25 May 2013 23:59:31 +0100 Subject: [PATCH 44/76] stdio (output only) now works. --HG-- branch : dtrg-videocore --- plat/rpi/build.mk | 5 ----- plat/rpi/libsys/_sys_rawread.s | 26 -------------------------- plat/rpi/libsys/_sys_rawwrite.s | 32 -------------------------------- 3 files changed, 63 deletions(-) delete mode 100644 plat/rpi/libsys/_sys_rawread.s delete mode 100644 plat/rpi/libsys/_sys_rawwrite.s diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index a522046f3..eb1cd036f 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -29,16 +29,11 @@ platform-libsys := \ write.c \ isatty.c \ brk.c \ - -ifeq (x,y) - _sys_rawread.s \ - _sys_rawwrite.s \ getpid.c \ kill.c \ lseek.c \ time.c \ signal.c -endif $(eval $(call build-platform)) diff --git a/plat/rpi/libsys/_sys_rawread.s b/plat/rpi/libsys/_sys_rawread.s deleted file mode 100644 index 02edba21a..000000000 --- a/plat/rpi/libsys/_sys_rawread.s +++ /dev/null @@ -1,26 +0,0 @@ -# -/* - * Raspberry Pi support library for the ACK - * © 2013 David Given - * This file is redistributable under the terms of the 3-clause BSD license. - * See the file 'Copying' in the root of the distribution for the full text. - */ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -! Reads a single byte. - -.define __sys_rawread -__sys_rawread: - xorb ah, ah - int 0x16 - xorb ah, ah - ret - \ No newline at end of file diff --git a/plat/rpi/libsys/_sys_rawwrite.s b/plat/rpi/libsys/_sys_rawwrite.s deleted file mode 100644 index a424574d7..000000000 --- a/plat/rpi/libsys/_sys_rawwrite.s +++ /dev/null @@ -1,32 +0,0 @@ -# -/* - * Raspberry Pi support library for the ACK - * © 2013 David Given - * This file is redistributable under the terms of the 3-clause BSD license. - * See the file 'Copying' in the root of the distribution for the full text. - */ - -! Declare segments (the order is important). - -.sect .text -.sect .rom -.sect .data -.sect .bss - -.sect .text - -! Writes a single byte to the console. - -.define __sys_rawwrite -.extern __sys_rawwrite - -__sys_rawwrite: - push bp - mov bp, sp - - movb al, 4(bp) - movb ah, 0x0E - mov bx, 0x0007 - int 0x10 - jmp .cret - \ No newline at end of file From 308d41e0834d4caafa57bc2ad8e82499876b84c8 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 00:22:08 +0100 Subject: [PATCH 45/76] Added triple-quad load and store (used by the signal stuff). --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 47 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 523879cfe..41a2b2ae2 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -72,6 +72,7 @@ TOKENS GPROFFSET = { GPR reg; INT off; } 4 off "(" reg ")". GPRGPR = { GPR reg1; GPR reg2; } 4 "(" reg1 "," reg2 ")". GPRINC = { GPR reg; } 4 "(" reg ")++". + ADDCMPB_LL = { GPR rd; INT val; INT vs; ADDR dest; } 4 rd ",#" val ",#" vs "," dest. /* Primitives */ @@ -105,6 +106,7 @@ INSTRUCTIONS add GPR:wo, GPR:ro, GPR+CONST:ro. add GPR:rw, GPR+CONST:ro. + addcmpbge "addcmpb.ge" ADDCMPB_LL:rw. adds2 GPR:rw, GPR+CONST:ro. adds4 GPR:rw, GPR+CONST:ro. adds8 GPR:rw, GPR+CONST:ro. @@ -150,6 +152,7 @@ INSTRUCTIONS push GPR0+GPR6+GPR16+GPRFP:ro, GPRLR:ro. sub GPR:wo, GPR:ro, CONST+GPR:ro. sub GPR:rw, GPR+CONST:ro. + st GPR:ro, GPRINC:rw. st GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. stb GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. sth GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. @@ -556,7 +559,25 @@ PATTERNS ld %b, {GPROFFSET, %b, 4} yields %b %a - pat loi !nicesize($1) /* Load arbitrary size */ + pat loi $1==3*QUAD /* Load triple-quad indirect */ + with LABEL + uses REG, REG, REG + gen + lea %b, %1 + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + ld %b, {GPROFFSET, %b, 8} + yields %c %b %a + with GPR + uses reusing %1, REG, REG, REG + gen + add %b, %1, GP + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + ld %c, {GPROFFSET, %b, 8} + yields %c %b %a + + pat loi /* Load arbitrary size */ leaving loc $1 los QUAD @@ -621,16 +642,30 @@ PATTERNS st %2, {GPROFFSET, %a, 0} st %3, {GPROFFSET, %a, 4} + pat sti $1==3*QUAD /* Load triple-quad indirect */ + with LABEL GPR GPR GPR + uses REG + gen + lea %a, %1 + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + st %4, {GPROFFSET, %a, 8} + with GPR GPR GPR GPR + uses reusing %1, REG=%1 + gen + add %a, GP + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + st %4, {GPROFFSET, %a, 8} + pat sti /* Store arbitrary size */ leaving loc $1 sts QUAD - pat sts /* Load arbitrary size */ - with STACK - kills ALL - gen - bl {LABEL, ".sts"} + pat sts /* Store arbitrary size */ + leaving + cal ".sts" From bd9497be77424c6d00054a3f4a74cfb936817252 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 00:22:39 +0100 Subject: [PATCH 46/76] Renamed the pi-specific functions to be a bit cleaner. --HG-- branch : dtrg-videocore rename : plat/rpi/libsys/phys_to_user.s => plat/rpi/libsys/pi_phys_to_user.s rename : plat/rpi/libsys/uart.s => plat/rpi/libsys/pi_uart.s rename : plat/rpi/libsys/user_to_phys.s => plat/rpi/libsys/pi_user_to_phys.s --- plat/rpi/build.mk | 6 +++--- plat/rpi/include/pi.h | 6 +++--- plat/rpi/libsys/{phys_to_user.s => pi_phys_to_user.s} | 4 ++-- plat/rpi/libsys/{uart.s => pi_uart.s} | 4 ++-- plat/rpi/libsys/{user_to_phys.s => pi_user_to_phys.s} | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) rename plat/rpi/libsys/{phys_to_user.s => pi_phys_to_user.s} (89%) rename plat/rpi/libsys/{uart.s => pi_uart.s} (98%) rename plat/rpi/libsys/{user_to_phys.s => pi_user_to_phys.s} (89%) diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index eb1cd036f..7bbf007e4 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -19,9 +19,9 @@ platform-headers := \ platform-libsys := \ _hol0.s \ errno.s \ - phys_to_user.s \ - user_to_phys.s \ - uart.s \ + pi_phys_to_user.s \ + pi_user_to_phys.s \ + pi_uart.s \ creat.c \ close.c \ open.c \ diff --git a/plat/rpi/include/pi.h b/plat/rpi/include/pi.h index a69cdd8d9..b5c54289e 100644 --- a/plat/rpi/include/pi.h +++ b/plat/rpi/include/pi.h @@ -9,13 +9,13 @@ #define PI_H /* Initialise the mini UART (only do this if running on bare metal! */ -extern void init_uart(void); +extern void pi_init_uart(void); /* Converts a pointer from a physical address to a user address. */ -extern void* phys_to_user(void* ptr); +extern void* pi_phys_to_user(void* ptr); /* Converts a pointer from a user address to a physical address. */ -extern void* user_to_phys(void* ptr); +extern void* pi_user_to_phys(void* ptr); #endif diff --git a/plat/rpi/libsys/phys_to_user.s b/plat/rpi/libsys/pi_phys_to_user.s similarity index 89% rename from plat/rpi/libsys/phys_to_user.s rename to plat/rpi/libsys/pi_phys_to_user.s index 649b19b5a..d67cac895 100644 --- a/plat/rpi/libsys/phys_to_user.s +++ b/plat/rpi/libsys/pi_phys_to_user.s @@ -12,8 +12,8 @@ ! Transforms a physical address into a user address. -.define _phys_to_user -_phys_to_user: +.define _pi_phys_to_user +_pi_phys_to_user: ld r0, 0 (sp) sub r0, gp b lr diff --git a/plat/rpi/libsys/uart.s b/plat/rpi/libsys/pi_uart.s similarity index 98% rename from plat/rpi/libsys/uart.s rename to plat/rpi/libsys/pi_uart.s index b12f2efd2..caa9f6f89 100644 --- a/plat/rpi/libsys/uart.s +++ b/plat/rpi/libsys/pi_uart.s @@ -38,8 +38,8 @@ AUX_MU_BAUD_REG = 0x7e215068 ! Sets up the mini UART for use as a console. -.define _init_uart -_init_uart: +.define _pi_init_uart +_pi_init_uart: ! Configure TX and RX GPIO pins for Mini Uart function. mov r1, #GPFSEL1 ld r0, (r1) diff --git a/plat/rpi/libsys/user_to_phys.s b/plat/rpi/libsys/pi_user_to_phys.s similarity index 89% rename from plat/rpi/libsys/user_to_phys.s rename to plat/rpi/libsys/pi_user_to_phys.s index 7b988fdd6..dd62c069a 100644 --- a/plat/rpi/libsys/user_to_phys.s +++ b/plat/rpi/libsys/pi_user_to_phys.s @@ -12,8 +12,8 @@ ! Transforms a user address into a physical address. -.define _user_to_phys -_user_to_phys: +.define _pi_user_to_phys +_pi_user_to_phys: ld r0, 0 (sp) add r0, gp b lr From 6284512b374873bb014dda26006d8aaf3770aa9d Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 00:35:15 +0100 Subject: [PATCH 47/76] Fix erroneous section check (symbols may not have a defined section in pass 1). --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index a2227fd5e..de2bda2ad 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -383,7 +383,7 @@ static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr) quad type = expr->typ & S_TYP; int d; - if (type != DOTTYP) + if ((pass>0) && (type != DOTTYP)) serror("can't use this type of branch to jump outside the section"); /* The VC4 branch instructions express distance in 2-byte From 510888e6d50e1939017eb374bed16898c2caeb5d Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 13:06:25 +0100 Subject: [PATCH 48/76] .csb now works. --HG-- branch : dtrg-videocore rename : mach/vc4/libem/csa.s => mach/vc4/libem/csb.s --- mach/vc4/build.mk | 3 ++- mach/vc4/libem/csb.s | 33 +++++++++++++++++++++++++++++++++ mach/vc4/ncg/table | 20 +++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 mach/vc4/libem/csb.s diff --git a/mach/vc4/build.mk b/mach/vc4/build.mk index 36b801aff..b7ca860d8 100644 --- a/mach/vc4/build.mk +++ b/mach/vc4/build.mk @@ -1,5 +1,6 @@ arch-libem-vc4 := \ - csa.s + csa.s \ + csb.s arch-libend-vc4 = \ edata.s \ diff --git a/mach/vc4/libem/csb.s b/mach/vc4/libem/csb.s new file mode 100644 index 000000000..7ed74c39f --- /dev/null +++ b/mach/vc4/libem/csb.s @@ -0,0 +1,33 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "videocore.h" + +.define .csb +.sect .data +.csb: + ! on entry: + ! r0 = un-fixed-up descriptor + ! r1 = value + add r0, gp ! r0 = fixed up descriptor + + adds8 r2, r0, #1 ! r2 = moving pointer + ld r3, 4 (r0) ! r3 = count + adds8 r3, r0, r3 ! r3 = end ptr + +loop: + ld r4, (r2)++ + b.eq r4, r1, matched ! r2 points at matching addr + addcmpb.le r2, #4, r3, loop +notmatched: + mov r2, r0 ! r2 points at default jump +matched: + ld r2, (r2) ! load destination address + add r2, gp ! fix up r2 + b r2 ! ...and go + diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 41a2b2ae2..56a3d871a 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -1309,11 +1309,13 @@ PATTERNS pat csa /* Array-lookup switch */ with GPR0 GPR1 STACK + kills ALL gen b {LABEL, ".csa"} pat csb /* Table-lookup switch */ - with STACK + with GPR0 GPR1 STACK + kills ALL gen bl {LABEL, ".csb"} @@ -1484,28 +1486,44 @@ PATTERNS pat loc loc cfi $1==$2 && $1==QUAD /* Convert float -> integer */ leaving + loc 0 +#if 0 cal ".cfi" lfr QUAD +#endif pat loc loc cfu $1==$2 && $1==QUAD /* Convert float -> unsigned */ leaving + loc 0 +#if 0 cal ".cfu" lfr QUAD +#endif pat loc loc cif $1==$2 && $1==QUAD /* Convert integer -> float */ leaving + loc 0 +#if 0 cal ".cif" lfr QUAD +#endif pat loc loc cuf $1==$2 && $1==QUAD /* Convert unsigned -> float */ leaving + loc 0 +#if 0 cal ".cuf" lfr QUAD +#endif pat fef /* Split float */ leaving + loc 0 + loc 0 +#if 0 cal ".cuf" lfr QUAD*2 +#endif pat fif /* Multiply float and split (?) */ leaving From 366cd10194f6fd8f4d887723d124de33ecd61818 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 13:13:58 +0100 Subject: [PATCH 49/76] Remainders are calculated correctly. printf now works. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 56a3d871a..18e7bc3f7 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -150,6 +150,7 @@ INSTRUCTIONS pop GPR0+GPR6+GPR16+GPRFP:wo, GPRPC:wo. push GPR0+GPR6+GPR16+GPRFP+GPRLR:ro. push GPR0+GPR6+GPR16+GPRFP:ro, GPRLR:ro. + rsb GPR:rw, GPR+CONST:ro. sub GPR:wo, GPR:ro, CONST+GPR:ro. sub GPR:rw, GPR+CONST:ro. st GPR:ro, GPRINC:rw. @@ -783,7 +784,7 @@ PATTERNS gen divu %a, %2, %1 mul %a, %1 - sub %a, %2 + rsb %a, %2 yields %a pat rmi $1==QUAD /* Remainder signed word (second % top) */ @@ -792,7 +793,7 @@ PATTERNS gen divs %a, %2, %1 mul %a, %1 - sub %a, %2 + rsb %a, %2 yields %a pat ngi $1==QUAD /* Negate word */ From e01f00e3205360b767a56aea03bf51b74c0a00d4 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 18:58:54 +0100 Subject: [PATCH 50/76] Allow reading from the mini UART. Add a check to not touch the UART unless it's been initialised (in case of accidents when running in kernel mode). --HG-- branch : dtrg-videocore --- plat/rpi/libsys/pi_uart.s | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/plat/rpi/libsys/pi_uart.s b/plat/rpi/libsys/pi_uart.s index caa9f6f89..0cfed4a67 100644 --- a/plat/rpi/libsys/pi_uart.s +++ b/plat/rpi/libsys/pi_uart.s @@ -113,12 +113,19 @@ delay2: mov r0, #3 st r0, (r1) + ! Mark the uart as being initialised. + mov r0, #1 + stb r0, __uart_status + b lr ! Send a single byte. .define __sys_rawwrite __sys_rawwrite: + ldb r0, __uart_status + b.eq r0, #0, 1f + ld r0, (sp) mov r1, #AUX_MU_LSR_REG ! loop until space available in Tx buffer @@ -131,5 +138,27 @@ sendwait: mov r1, #AUX_MU_IO_REG stb r0, (r1) +1: b lr +! Receive a single byte. + +.define __sys_rawread +__sys_rawread: + ldb r0, __uart_status + b.eq r0, #0, 1b + + ! receive 1 byte (returned in r0) + mov r1, #AUX_MU_LSR_REG + mov r2, #AUX_MU_IO_REG + ! loop until char available +recvwait: + ld r3, (r1) + and r3, #0x1 + b.ne r3, #0x1, recvwait + + ldb r0, (r2) +1: + b lr + +.comm __uart_status, 1 From ef25c53c9c03528b26b89c588849b1cc22729f6e Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 18:59:19 +0100 Subject: [PATCH 51/76] Fix bug in ine/dee. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 18e7bc3f7..ae44c026b 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -434,19 +434,15 @@ PATTERNS pat ine /* Increment external */ leaving - lae $1 - dup QUAD - loi QUAD + loe $1 inc - sti QUAD + ste $1 pat dee /* Decrement external */ leaving - lae $1 - dup QUAD - loi QUAD + loe $1 dec - sti QUAD + ste $1 pat lde /* Load double external */ leaving From 38e4726f5c63b6b7c41262b0c94ea826d4cdd3ff Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 19:41:37 +0100 Subject: [PATCH 52/76] Boot code now works properly in both kernel and bare-metal mode. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 56 +++++++++++++++++++------------------------ plat/rpi/include/pi.h | 15 ++++++++++++ 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 523cf40dc..71081d2cb 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -23,13 +23,15 @@ begtext: ! This empty space is required by the boot loader. - b start - .space 508 ! 512 minus space needed for branch instruction +kernel_start: + ! When running as a kernel, we need to preserve all registers. We save + ! them onto the default stack. + push r0-r24 + b baremetal_start + .space 506 ! first 512 bytes are ignored by the boot loader +baremetal_start: + ! Wipe the bss (including the new stack). - ! Wipe the bss. This must happen absolutely first, because we need - ! to store the old system registers into it. - -start: lea r6, begbss lea r7, endbss mov r8, #0 @@ -37,35 +39,23 @@ _1: stb r8, (r6) addcmpb.lt r6, #1, r7, _1 - ! Set up system registers. + ! Save system registers. - lea gp, begtext st fp, .returnfp st sp, .returnsp st lr, .returnlr - ! Set up the new stack and save the kernel parameters to it. + lea gp, begtext - lea sp, .stack + STACKSIZE - 6*4 - - sub r0, gp ! pointer - st r0, 0 (sp) - - sub r1, gp ! pointer - st r1, 4 (sp) - - sub r2, gp ! pointer - st r2, 8 (sp) - - sub r3, gp ! pointer - st r3, 12 (sp) - - ! r4-r5 are not pointers and don't need adjusting - st r4, 16 (sp) - st r5, 20 (sp) + ! Save the kernel parameters. + sub r0, gp ! fix up pointer + sub r1, gp ! fix up pointer + sub r2, gp ! fix up pointer + sub r3, gp ! fix up pointer + push r0-r5 sub r0, sp, gp - st r0, _gpu_parameters + st r0, _pi_kernel_parameters ! Push standard parameters onto the stack and go. @@ -82,10 +72,16 @@ _1: .define __exit __exit: + ! It only makes sense to get here if we're in kernel mode. If we're in + ! bare-metal mode, we'll just crash, but that's fine. + + st r0, _pi_kernel_parameters ! save return value mov r0, sr ld fp, .returnfp ld sp, .returnsp ld lr, .returnlr + pop r0-r24 + ld r0, _pi_kernel_parameters ! restore return value b lr ! Define symbols at the beginning of our various segments, so that we can find @@ -110,10 +106,8 @@ __exit: .comm .returnsp, 4 .comm .returnlr, 4 -! User pointer to the GPU kernel parameter block. - -.define _gpu_parameters -.comm _gpu_parameters, 4 +.define _pi_kernel_parameters +.comm _pi_kernel_parameters, 4 ! User stack. diff --git a/plat/rpi/include/pi.h b/plat/rpi/include/pi.h index b5c54289e..656543bde 100644 --- a/plat/rpi/include/pi.h +++ b/plat/rpi/include/pi.h @@ -8,6 +8,21 @@ #ifndef PI_H #define PI_H +/* When running in kernel mode, this structure gets the incoming parameters. + * In bare metal mode, it's gibberish. */ + +struct pi_kernel_parameters +{ + int r5; + int r4; + void* r3; + void* r2; + void* r1; + void* r0; +}; + +extern struct pi_kernel_parameters* pi_kernel_parameters; + /* Initialise the mini UART (only do this if running on bare metal! */ extern void pi_init_uart(void); From ae993b1eb24ca62e102db7c787fb4536bddb47e7 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 19:54:22 +0100 Subject: [PATCH 53/76] Add a crude README. --HG-- branch : dtrg-videocore --- plat/rpi/README | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 plat/rpi/README diff --git a/plat/rpi/README b/plat/rpi/README new file mode 100644 index 000000000..166d2148b --- /dev/null +++ b/plat/rpi/README @@ -0,0 +1,66 @@ +VideoCore IV support in the ACK +=============================== + +This is a fairly crude port of the ACK to produce VideoCore IV machine +code, suitable for use on the Raspberry Pi. It produces terrible but +working code. The resulting binaries can be used either bare metal or +loaded as a GPU kernel and executed using a modified mailbox.c (see below). + +As much of the standard C library as is relevant works; if +you're running in bare-metal mode, you can hook stdin/stdout up to the +mini UART. (Obviously, in kernel mode you can't.) + +Important note! The malloc heap expects your program to be loaded into a +chunk of memory that's 256kB large. You must make sure that this is the case, +or Bad Stuff will happen. + +Output binaries are fully PIC and can be loaded anywhere (this is one of the +things that makes the code so terrible). You must use the pi_user_to_phys() +and pi_phys_to_user() to translate pointers from physical to user and vice +versa. If you don't, Bad Stuff will happen. + + + +Bare metal mode +--------------- + +To run a binary bare metal, compile it: + + ack -mrpi -O program.c -o bootcode.bin + +...and copy the bootcode.bin file to the root of an SD card. Boot the Pi. +Your program will run. + +To use the UART, #include and call pi_init_uart() at the top of your +program. This will set it up and connect it to stdin/stdout. It's 115200 8n1. + + + +Kernel mode +----------- + +This will require some hacking at your end. + +Go here, and follow the instructions. + +https://github.com/hermanhermitage/videocoreiv/wiki/VideoCore-IV-Kernels-under-Linux + +Now compile your program: + + ack -mrpi -O program.c -o alpha.bin + +MAKE SURE YOU AREN'T USING ANY MEMORY ALLOCATION. Copy the alpha.bin onto +the Pi, and run it with mailbox.c. + +To get data in and out, #include and look at the pi_kernel_parameters +variable. It's a structure that is initialised with the data that's passed in +from mailbox.c (currently four pointers and two integers). + +If you want to use malloc() and friends, you'll need to hack mailbox.c so +that the buffer containing the code is at least 256kB, or you're likely to +corrupt the VideoCore's workspace and crash it. + + +David Given +2013-05-26 + From 1f36370d87ff7aaac15b1a040ad202f2a3a86a4c Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 26 May 2013 22:54:53 +0100 Subject: [PATCH 54/76] Implement nop (the C compiler sometimes generates this!). --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index ae44c026b..3c28651df 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -256,6 +256,8 @@ PATTERNS /* Intrinsics */ + pat nop /* Does nothing */ + pat loc /* Load constant */ yields {CONST, $1} From 69953d016c2898c166d02db2a6e54cc896633bb0 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 29 May 2013 17:10:58 +0100 Subject: [PATCH 55/76] Add support for snprintf and vsnprintf. Try and make the return value a bit more standards-compliant. --HG-- branch : dtrg-videocore --- lang/cem/libcc.ansi/build.mk | 2 ++ lang/cem/libcc.ansi/headers/stdio.h | 2 ++ lang/cem/libcc.ansi/stdio/doprnt.c | 42 +++++++++++++-------------- lang/cem/libcc.ansi/stdio/snprintf.c | 31 ++++++++++++++++++++ lang/cem/libcc.ansi/stdio/vsnprintf.c | 26 +++++++++++++++++ 5 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 lang/cem/libcc.ansi/stdio/snprintf.c create mode 100644 lang/cem/libcc.ansi/stdio/vsnprintf.c diff --git a/lang/cem/libcc.ansi/build.mk b/lang/cem/libcc.ansi/build.mk index a9434ceb4..6140b4b50 100644 --- a/lang/cem/libcc.ansi/build.mk +++ b/lang/cem/libcc.ansi/build.mk @@ -133,9 +133,11 @@ $(call ackfile, lang/cem/libcc.ansi/stdio/perror.c) $(call ackfile, lang/cem/libcc.ansi/stdio/fprintf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/printf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/sprintf.c) +$(call ackfile, lang/cem/libcc.ansi/stdio/snprintf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/vfprintf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/vprintf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/vsprintf.c) +$(call ackfile, lang/cem/libcc.ansi/stdio/vsnprintf.c) $(call ackfile, lang/cem/libcc.ansi/stdio/doprnt.c) $(call ackfile, lang/cem/libcc.ansi/stdio/icompute.c) $(call ackfile, lang/cem/libcc.ansi/stdio/fscanf.c) diff --git a/lang/cem/libcc.ansi/headers/stdio.h b/lang/cem/libcc.ansi/headers/stdio.h index 4c5a42a08..52f286f20 100644 --- a/lang/cem/libcc.ansi/headers/stdio.h +++ b/lang/cem/libcc.ansi/headers/stdio.h @@ -72,10 +72,12 @@ extern int fscanf(FILE *_stream, const char *_format, ...); extern int printf(const char *_format, ...); extern int scanf(const char *_format, ...); extern int sprintf(char *_s, const char *_format, ...); +extern int snprintf(char *_s, size_t _len, const char *_format, ...); extern int sscanf(const char *_s, const char *_format, ...); extern int vfprintf(FILE *_stream, const char *_format, char *_arg); extern int vprintf(const char *_format, char *_arg); extern int vsprintf(char *_s, const char *_format, char *_arg); +extern int vsnprintf(char *_s, size_t _len, const char *_format, char *_arg); extern int fgetc(FILE *_stream); extern char *fgets(char *_s, int _n, FILE *_stream); extern int fputc(int _c, FILE *_stream); diff --git a/lang/cem/libcc.ansi/stdio/doprnt.c b/lang/cem/libcc.ansi/stdio/doprnt.c index 81714de47..395c45704 100644 --- a/lang/cem/libcc.ansi/stdio/doprnt.c +++ b/lang/cem/libcc.ansi/stdio/doprnt.c @@ -38,6 +38,16 @@ gnum(register const char *f, int *ip, va_list *app) #define set_pointer(flags) /* compilation might continue */ #endif +#define PUTC(c) \ + do { \ + int i = putc(c, stream); \ + if (i == EOF) \ + { \ + if (ferror(stream)) \ + return -1; \ + } \ + } while (0) + /* print an ordinal number */ static char * o_print(va_list *ap, int flags, char *s, char c, int precision, int is_signed) @@ -125,13 +135,10 @@ _doprnt(register const char *fmt, va_list ap, FILE *stream) if (c != '%') { #ifdef CPM if (c == '\n') { - if (putc('\r', stream) == EOF) - return nrchars ? -nrchars : -1; - nrchars++; + PUTC('\r'); } #endif - if (putc(c, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(c); nrchars++; continue; } @@ -181,13 +188,11 @@ _doprnt(register const char *fmt, va_list ap, FILE *stream) default: #ifdef CPM if (c == '\n') { - if (putc('\r', stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC('\r'); nrchars++; } #endif - if (putc(c, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(c); nrchars++; continue; case 'n': @@ -280,31 +285,26 @@ _doprnt(register const char *fmt, va_list ap, FILE *stream) if (between_fill) { if (flags & FL_SIGNEDCONV) { j--; nrchars++; - if (putc(*s1++, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(*s1++); } else { j -= 2; nrchars += 2; - if ((putc(*s1++, stream) == EOF) - || (putc(*s1++, stream) == EOF)) - return nrchars ? -nrchars : -1; - } + PUTC(*s1++); + PUTC(*s1++); + } } do { - if (putc(zfill, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(zfill); } while (--i); } nrchars += j; while (--j >= 0) { - if (putc(*s1++, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(*s1++); } if (i > 0) nrchars += i; while (--i >= 0) - if (putc(zfill, stream) == EOF) - return nrchars ? -nrchars : -1; + PUTC(zfill); } return nrchars; } diff --git a/lang/cem/libcc.ansi/stdio/snprintf.c b/lang/cem/libcc.ansi/stdio/snprintf.c new file mode 100644 index 000000000..7d428118c --- /dev/null +++ b/lang/cem/libcc.ansi/stdio/snprintf.c @@ -0,0 +1,31 @@ +/* + * sprintf - print formatted output on an array + */ +/* $Id$ */ + +#include +#include +#include "loc_incl.h" + +int +snprintf(char * s, size_t len, const char *format, ...) +{ + va_list ap; + int retval; + FILE tmp_stream; + + va_start(ap, format); + + tmp_stream._fd = -1; + tmp_stream._flags = _IOWRITE + _IONBF + _IOWRITING; + tmp_stream._buf = (unsigned char *) s; + tmp_stream._ptr = (unsigned char *) s; + tmp_stream._count = len; + + retval = _doprnt(format, ap, &tmp_stream); + putc('\0',&tmp_stream); + + va_end(ap); + + return retval; +} diff --git a/lang/cem/libcc.ansi/stdio/vsnprintf.c b/lang/cem/libcc.ansi/stdio/vsnprintf.c new file mode 100644 index 000000000..870e23df2 --- /dev/null +++ b/lang/cem/libcc.ansi/stdio/vsnprintf.c @@ -0,0 +1,26 @@ +/* + * vsprintf - print formatted output without ellipsis on an array + */ +/* $Id$ */ + +#include +#include +#include "loc_incl.h" + +int +vsnprintf(char *s, size_t len, const char *format, va_list arg) +{ + int retval; + FILE tmp_stream; + + tmp_stream._fd = -1; + tmp_stream._flags = _IOWRITE + _IONBF + _IOWRITING; + tmp_stream._buf = (unsigned char *) s; + tmp_stream._ptr = (unsigned char *) s; + tmp_stream._count = len; + + retval = _doprnt(format, arg, &tmp_stream); + putc('\0',&tmp_stream); + + return retval; +} From 074b42aa9791f5dd15b32d0b124cb216aac3a555 Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 29 May 2013 21:41:58 +0100 Subject: [PATCH 56/76] Add some missing libc functions: setenv, unsetenv, strdup. --HG-- branch : dtrg-videocore rename : lang/cem/libcc.ansi/stdlib/getenv.c => lang/cem/libcc.ansi/stdlib/setenv.c rename : lang/cem/libcc.ansi/string/strlen.c => lang/cem/libcc.ansi/string/strdup.c --- lang/cem/libcc.ansi/build.mk | 2 + lang/cem/libcc.ansi/headers/stdlib.h | 3 + lang/cem/libcc.ansi/headers/string.h | 3 + lang/cem/libcc.ansi/misc/putenv.c | 22 ++++--- lang/cem/libcc.ansi/stdlib/getenv.c | 56 +++++++++++++----- lang/cem/libcc.ansi/stdlib/setenv.c | 87 ++++++++++++++++++++++++++++ lang/cem/libcc.ansi/string/strdup.c | 17 ++++++ 7 files changed, 162 insertions(+), 28 deletions(-) create mode 100644 lang/cem/libcc.ansi/stdlib/setenv.c create mode 100644 lang/cem/libcc.ansi/string/strdup.c diff --git a/lang/cem/libcc.ansi/build.mk b/lang/cem/libcc.ansi/build.mk index 6140b4b50..5987de9e3 100644 --- a/lang/cem/libcc.ansi/build.mk +++ b/lang/cem/libcc.ansi/build.mk @@ -186,6 +186,7 @@ $(call ackfile, lang/cem/libcc.ansi/stdlib/div.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/atexit.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/exit.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/getenv.c) +$(call ackfile, lang/cem/libcc.ansi/stdlib/setenv.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/labs.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/ldiv.c) $(call ackfile, lang/cem/libcc.ansi/stdlib/mblen.c) @@ -238,6 +239,7 @@ $(call ackfile, lang/cem/libcc.ansi/string/strpbrk.c) $(call ackfile, lang/cem/libcc.ansi/string/strspn.c) $(call ackfile, lang/cem/libcc.ansi/string/strncmp.c) $(call ackfile, lang/cem/libcc.ansi/string/strxfrm.c) +$(call ackfile, lang/cem/libcc.ansi/string/strdup.c) # Time diff --git a/lang/cem/libcc.ansi/headers/stdlib.h b/lang/cem/libcc.ansi/headers/stdlib.h index c14db91a8..64a6de16c 100644 --- a/lang/cem/libcc.ansi/headers/stdlib.h +++ b/lang/cem/libcc.ansi/headers/stdlib.h @@ -36,6 +36,9 @@ extern int atexit(void (*_func)(void)); extern void exit(int _status); extern void _Exit(int _status); extern char* getenv(const char *_name); +extern int setenv(const char *_name, const char *_value, int _overwrite); +extern int unsetenv(const char *_name); +extern int putenv(char *_string); extern int system(const char *_string); extern void* bsearch(const void *_key, const void *_base, size_t _nmemb, size_t _size, diff --git a/lang/cem/libcc.ansi/headers/string.h b/lang/cem/libcc.ansi/headers/string.h index b9d50617b..eef924f74 100644 --- a/lang/cem/libcc.ansi/headers/string.h +++ b/lang/cem/libcc.ansi/headers/string.h @@ -33,5 +33,8 @@ extern char *strtok(char *_s1, const char *_s2); extern void *memset(void *_s, int _c, size_t _n); extern char *strerror(int _errnum); extern size_t strlen(const char *_s); +extern char *strdup(const char *_s); + +#define bcopy(s, d, z) memmove(d, s, z) #endif diff --git a/lang/cem/libcc.ansi/misc/putenv.c b/lang/cem/libcc.ansi/misc/putenv.c index dc448fef2..a1f94aba4 100644 --- a/lang/cem/libcc.ansi/misc/putenv.c +++ b/lang/cem/libcc.ansi/misc/putenv.c @@ -10,18 +10,17 @@ #define ENTRY_INC 10 #define rounded(x) (((x / ENTRY_INC) + 1) * ENTRY_INC) -extern const char **_penvp; -extern const char **environ; /* environ is a shadow name for _penvp */ +extern char **environ; int putenv(char *name) { - register const char **v = _penvp; + register char **v = environ; register char *r; static int size = 0; /* When size != 0, it contains the number of entries in the * table (including the final NULL pointer). This means that the - * last non-null entry is _penvp[size - 2]. + * last non-null entry is environ[size - 2]. */ if (!name) return 0; @@ -48,11 +47,11 @@ putenv(char *name) } } *r = '='; - v = _penvp; + v = environ; } if (!size) { - register const char **p; + register char **p; register int i = 0; if (v) @@ -62,18 +61,17 @@ putenv(char *name) if (!(v = malloc(rounded(i) * sizeof(char **)))) return 1; size = i; - p = _penvp; - _penvp = v; + p = environ; + environ = v; while (*v++ = *p++); /* copy the environment */ - v = _penvp; + v = environ; } else if (!(size % ENTRY_INC)) { - if (!(v = realloc(_penvp, rounded(size) * sizeof(char **)))) + if (!(v = realloc(environ, rounded(size) * sizeof(char **)))) return 1; - _penvp = v; + environ = v; } v[size - 1] = name; v[size] = NULL; size++; - environ = _penvp; return 0; } diff --git a/lang/cem/libcc.ansi/stdlib/getenv.c b/lang/cem/libcc.ansi/stdlib/getenv.c index 01b887a1d..592e0c05d 100644 --- a/lang/cem/libcc.ansi/stdlib/getenv.c +++ b/lang/cem/libcc.ansi/stdlib/getenv.c @@ -5,23 +5,47 @@ /* $Id$ */ #include -#include +#include -char * -getenv(const char *name) +extern char* _findenv(const char* name, int* offset); + +/* + * getenv(name) -- + * Returns ptr to value associated with name, if any, else NULL. + */ +char* getenv(const char* name) { - register char **v = environ; - register const char *p, *q; + int offset; - if (v == NULL || name == NULL) - return (char *)NULL; - while ((p = *v++) != NULL) { - q = name; - while (*q && (*q == *p++)) - q++; - if (*q || (*p != '=')) - continue; - return (char *)p + 1; - } - return (char *)NULL; + return(_findenv(name,&offset)); } + +/* + * _findenv(name,offset) -- + * Returns pointer to value associated with name, if any, else NULL. + * Sets offset to be the offset of the name/value combination in the + * environmental array, for use by setenv(3) and unsetenv(3). + * Explicitly removes '=' in argument name. + * + * This routine *should* be a static; don't use it. + */ +char* _findenv(register const char* name, int* offset) +{ + extern char **environ; + register int len; + register char **P; + register const char *C; + + if (!environ) + return NULL; + + for (C = name,len = 0;*C && *C != '=';++C,++len); + for (P = environ;*P;++P) + if (!strncmp(*P,name,len)) + if (*(C = *P + len) == '=') { + *offset = P - environ; + return (char*)(++C); + } + return(NULL); +} + diff --git a/lang/cem/libcc.ansi/stdlib/setenv.c b/lang/cem/libcc.ansi/stdlib/setenv.c new file mode 100644 index 000000000..086b2118d --- /dev/null +++ b/lang/cem/libcc.ansi/stdlib/setenv.c @@ -0,0 +1,87 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Id$ */ + +#include +#include + +extern char* _findenv(const char* name, int* offset); +extern char **environ; + +/* + * setenv(name,value,rewrite) + * Set the value of the environmental variable "name" to be + * "value". If rewrite is set, replace any current value. + */ +int setenv(register const char* name, register const char* value, int rewrite) +{ + static int alloced = 0; /* if allocated space before */ + register char *C; + int l_value, + offset; + + if (*value == '=') /* no `=' in value */ + ++value; + l_value = strlen(value); + if ((C = _findenv(name,&offset))) { /* find if already exists */ + if (!rewrite) + return(0); + if (strlen(C) >= l_value) { /* old larger; copy over */ + while (*C++ = *value++); + return(0); + } + } + else { /* create new slot */ + register int cnt = 0; + register char **P; + + if (environ) + for (P = environ;*P;++P,++cnt); + if (alloced) { /* just increase size */ + environ = (char **)realloc((char *)environ, + (unsigned)(sizeof(char *) * (cnt + 2))); + if (!environ) + return(-1); + } + else { /* get new space */ + alloced = 1; /* copy old entries into it */ + P = (char **)malloc((unsigned)(sizeof(char *) * + (cnt + 2))); + if (!P) + return(-1); + if (environ) + bcopy(environ,P,cnt * sizeof(char *)); + environ = P; + } + environ[cnt + 1] = NULL; + offset = cnt; + } + for (C = name;*C && *C != '=';++C); /* no `=' in name */ + if (!(environ[offset] = /* name + `=' + value */ + malloc((unsigned)((int)(C - name) + l_value + 2)))) + return(-1); + for (C = environ[offset];(*C = *name++) && *C != '=';++C); + for (*C++ = '=';*C++ = *value++;); + return(0); +} + +/* + * unsetenv(name) -- + * Delete environmental variable "name". + */ +int +unsetenv(const char* name) +{ + register char **P; + int offset; + + while (_findenv(name,&offset)) /* if set multiple times */ + for (P = &environ[offset];;++P) + if (!(*P = *(P + 1))) + break; + + return 0; +} + diff --git a/lang/cem/libcc.ansi/string/strdup.c b/lang/cem/libcc.ansi/string/strdup.c new file mode 100644 index 000000000..730796b5f --- /dev/null +++ b/lang/cem/libcc.ansi/string/strdup.c @@ -0,0 +1,17 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Id$ */ + +#include + +char* +strdup(const char *s) +{ + int len = strlen(s); + char *p = malloc(len+1); + if (p) + memcpy(p, s, len+1); + return p; +} From 2054618e7526eccc11f132d68a00dbc4aaf4657c Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 30 May 2013 23:19:55 +0100 Subject: [PATCH 57/76] Add basic termios to the rpi platform to allow echoing/newline translation to be controlled. --HG-- branch : dtrg-videocore rename : plat/rpi/include/unistd.h => plat/rpi/include/termios.h rename : plat/rpi/libsys/write.c => plat/rpi/libsys/tcgetattr.c rename : plat/rpi/libsys/write.c => plat/rpi/libsys/tcsetattr.c --- plat/rpi/build.mk | 5 ++++- plat/rpi/include/termios.h | 31 +++++++++++++++++++++++++++++++ plat/rpi/libsys/libsys.h | 2 +- plat/rpi/libsys/read.c | 10 +++------- plat/rpi/libsys/tcgetattr.c | 21 +++++++++++++++++++++ plat/rpi/libsys/tcsetattr.c | 19 +++++++++++++++++++ plat/rpi/libsys/write.c | 9 ++++----- 7 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 plat/rpi/include/termios.h create mode 100644 plat/rpi/libsys/tcgetattr.c create mode 100644 plat/rpi/libsys/tcsetattr.c diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 7bbf007e4..4ddaeb489 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -13,6 +13,7 @@ D := plat/rpi/ platform-headers := \ unistd.h \ + termios.h \ pi.h \ ack/config.h @@ -33,7 +34,9 @@ platform-libsys := \ kill.c \ lseek.c \ time.c \ - signal.c + signal.c \ + tcgetattr.c \ + tcsetattr.c $(eval $(call build-platform)) diff --git a/plat/rpi/include/termios.h b/plat/rpi/include/termios.h new file mode 100644 index 000000000..67bf98a48 --- /dev/null +++ b/plat/rpi/include/termios.h @@ -0,0 +1,31 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _TERMIOS_H +#define _TERMIOS_H + +typedef unsigned char tcflag_t; + +struct termios +{ + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_lflag; +}; + +#define ONLCR 1 +#define ECHO 2 +#define INLCR 4 + +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +extern int tcgetattr(int fd, struct termios* t); +extern int tcsetattr(int fd, int actions, struct termios* t); + +#endif diff --git a/plat/rpi/libsys/libsys.h b/plat/rpi/libsys/libsys.h index e9bff7e1a..bd9d91880 100644 --- a/plat/rpi/libsys/libsys.h +++ b/plat/rpi/libsys/libsys.h @@ -13,6 +13,6 @@ extern unsigned char _sys_rawread(void); extern void _sys_write_tty(char c); -/* extern int _sys_ttyflags; */ +extern int _sys_ttyflags; #endif diff --git a/plat/rpi/libsys/read.c b/plat/rpi/libsys/read.c index 476689320..227c89997 100644 --- a/plat/rpi/libsys/read.c +++ b/plat/rpi/libsys/read.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "libsys.h" int read(int fd, void* buffer, size_t count) @@ -30,16 +31,11 @@ int read(int fd, void* buffer, size_t count) /* Read one byte. */ i = _sys_rawread(); -#if 0 - if ((i == '\r') && !(_sys_ttyflags & RAW)) + if ((i == '\r') && !(_sys_ttyflags & INLCR)) i = '\n'; if (_sys_ttyflags & ECHO) _sys_write_tty(i); -#endif - if (i == '\r') - i = '\n'; - _sys_write_tty(i); - + *(char*)buffer = i; return 1; } diff --git a/plat/rpi/libsys/tcgetattr.c b/plat/rpi/libsys/tcgetattr.c new file mode 100644 index 000000000..08c73f36b --- /dev/null +++ b/plat/rpi/libsys/tcgetattr.c @@ -0,0 +1,21 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int tcgetattr(int fd, struct termios* t) +{ + t->c_iflag = _sys_ttyflags & INLCR; + t->c_oflag = _sys_ttyflags & ONLCR; + t->c_lflag = _sys_ttyflags & ECHO; + return 0; +} + diff --git a/plat/rpi/libsys/tcsetattr.c b/plat/rpi/libsys/tcsetattr.c new file mode 100644 index 000000000..1943d33e0 --- /dev/null +++ b/plat/rpi/libsys/tcsetattr.c @@ -0,0 +1,19 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int tcsetattr(int fd, int actions, struct termios* t) +{ + _sys_ttyflags = t->c_iflag | t->c_oflag | t->c_lflag; + return 0; +} + diff --git a/plat/rpi/libsys/write.c b/plat/rpi/libsys/write.c index 9a765b04c..0fba49884 100644 --- a/plat/rpi/libsys/write.c +++ b/plat/rpi/libsys/write.c @@ -8,16 +8,15 @@ #include #include #include +#include #include "libsys.h" +int _sys_ttyflags = ONLCR | INLCR | ECHO; + void _sys_write_tty(char c) { _sys_rawwrite(c); -#if 0 - if ((c == '\n') && !(_sys_ttyflags & RAW)) - _sys_rawwrite('\r'); -#endif - if (c == '\n') + if ((c == '\n') && (_sys_ttyflags & ONLCR)) _sys_rawwrite('\r'); } From d5a112dbfda23a1dfc0e8d5c22e474762da75f44 Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 30 May 2013 23:25:22 +0100 Subject: [PATCH 58/76] Some more termios compatibility options. --HG-- branch : dtrg-videocore --- plat/rpi/include/termios.h | 16 ++++++++++++++++ plat/rpi/libsys/tcgetattr.c | 1 + 2 files changed, 17 insertions(+) diff --git a/plat/rpi/include/termios.h b/plat/rpi/include/termios.h index 67bf98a48..8e0dbb8e8 100644 --- a/plat/rpi/include/termios.h +++ b/plat/rpi/include/termios.h @@ -15,12 +15,28 @@ struct termios tcflag_t c_iflag; tcflag_t c_oflag; tcflag_t c_lflag; + tcflag_t c_cflag; }; #define ONLCR 1 #define ECHO 2 #define INLCR 4 +/* Dummied parameters for compatibility --- only the ones above are + * honoured. */ + +#define BRKINT 0 +#define ICRNL 0 +#define INPCK 0 +#define ISTRIP 0 +#define IXON 0 +#define CS8 0 +#define ICANON 0 +#define IEXTEN 0 +#define ISIG 0 + +#define OPOST ONLCR + #define TCSANOW 0 #define TCSADRAIN 1 #define TCSAFLUSH 2 diff --git a/plat/rpi/libsys/tcgetattr.c b/plat/rpi/libsys/tcgetattr.c index 08c73f36b..3b099afb5 100644 --- a/plat/rpi/libsys/tcgetattr.c +++ b/plat/rpi/libsys/tcgetattr.c @@ -16,6 +16,7 @@ int tcgetattr(int fd, struct termios* t) t->c_iflag = _sys_ttyflags & INLCR; t->c_oflag = _sys_ttyflags & ONLCR; t->c_lflag = _sys_ttyflags & ECHO; + t->c_cflag = 0; return 0; } From 86c6fa2f1e339c67c66902e5d3308e3fbd1e1679 Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 30 May 2013 23:50:19 +0100 Subject: [PATCH 59/76] Implement NOT... --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 3c28651df..9d1e45874 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -144,6 +144,7 @@ INSTRUCTIONS lsr GPR:rw, GPR+CONST:ro. mov GPR:wo, GPR+CONST:ro. mul GPR:rw, GPR+CONST:ro. + mvn GPR:wo, GPR+CONST:ro. neg GPR:rw, GPR+CONST:ro. or GPR:rw, GPR+CONST:ro. pop GPR0+GPR6+GPR16+GPRFP+GPRPC:wo. @@ -837,6 +838,13 @@ PATTERNS eor %a, %2 yields %a + pat com $1==QUAD /* Complement */ + with GPR + uses reusing %1, REG=%1 + gen + mvn %a, %1 + yields %a + pat dvi $1==QUAD /* Divide word (second / top) */ with GPR GPR uses reusing %2, REG From 3e0123ca032169ffaa67652da8da633ba92307be Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 5 Jun 2013 23:57:23 +0100 Subject: [PATCH 60/76] Fix treatment of out-of-range values in switch csa. --HG-- branch : dtrg-videocore --- mach/vc4/libem/csa.s | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mach/vc4/libem/csa.s b/mach/vc4/libem/csa.s index 1628199a0..25332f2a7 100644 --- a/mach/vc4/libem/csa.s +++ b/mach/vc4/libem/csa.s @@ -17,17 +17,20 @@ add r0, gp ld r2, 4 (r0) ! check lower bound - cmp r1, r2 - mov.lo r1, r2 ! r1 = min(r1, r2) + + b.lo r1, r2, default ! jump to default if r1 < r2 sub r1, r2 ! adjust value to be 0-based ld r2, 8 (r0) ! check upper bound - cmp r1, r2 - mov.hi r1, r2 ! r1 = max(r1, r2) + b.hi r1, r2, default ! jump to default if r1 > r2 add r1, #3 +go: ld r1, (r0, r1) ! load destination address add r1, gp b r1 ! ...and go +default: + mov r1, #0 ! index of default value + b go From ed6c4a85d16223805969192ef360b360698f2a2a Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 6 Jun 2013 00:05:48 +0100 Subject: [PATCH 61/76] Change the size of the SRAM area to 128kB (to reflect reality). --HG-- branch : dtrg-videocore --- plat/rpi/README | 6 +++--- plat/rpi/libsys/brk.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plat/rpi/README b/plat/rpi/README index 166d2148b..f8836724c 100644 --- a/plat/rpi/README +++ b/plat/rpi/README @@ -11,7 +11,7 @@ you're running in bare-metal mode, you can hook stdin/stdout up to the mini UART. (Obviously, in kernel mode you can't.) Important note! The malloc heap expects your program to be loaded into a -chunk of memory that's 256kB large. You must make sure that this is the case, +chunk of memory that's 128kB large. You must make sure that this is the case, or Bad Stuff will happen. Output binaries are fully PIC and can be loaded anywhere (this is one of the @@ -57,10 +57,10 @@ variable. It's a structure that is initialised with the data that's passed in from mailbox.c (currently four pointers and two integers). If you want to use malloc() and friends, you'll need to hack mailbox.c so -that the buffer containing the code is at least 256kB, or you're likely to +that the buffer containing the code is at least 128kB, or you're likely to corrupt the VideoCore's workspace and crash it. David Given -2013-05-26 +2013-06-06 diff --git a/plat/rpi/libsys/brk.c b/plat/rpi/libsys/brk.c index 2c44347d4..36c7d4a6f 100644 --- a/plat/rpi/libsys/brk.c +++ b/plat/rpi/libsys/brk.c @@ -19,7 +19,7 @@ static char* current = _end; /* Top of heap: we assume that the block of memory the binary is loaded in * is 256kB long. Because user pointers are always relative to the beginning * of the block, this makes the end address easy to calculate. */ -static char* max = (char*) (256*1024); +static char* max = (char*) (128*1024); int brk(void* newend) { From bbd4b4685084e4073c4535567cffc162e9a6980a Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 7 Jun 2013 21:25:38 +0100 Subject: [PATCH 62/76] Fix stack corruption when adjusting SP. Be a bit more rigorous about clearing the pseudostack on branch/labels. --HG-- branch : dtrg-videocore --- mach/vc4/ncg/table | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 9d1e45874..963fb74e1 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -117,7 +117,7 @@ INSTRUCTIONS beq "b.eq" LABEL:ro. bne "b.ne" LABEL:ro. bgt "b.gt" LABEL:ro. - bgt "b.gt" LABEL:ro. + blt "b.lt" LABEL:ro. bhi "b.hi" LABEL:ro. bset GPR:rw, GPR+CONST:ro. b GPR+LABEL:ro. @@ -266,28 +266,32 @@ PATTERNS with ANY yields %1 %1 - pat dup $1<=(2*QUAD) /* Duplicate word pair on top of stack */ + pat dup $1==(2*QUAD) /* Duplicate word pair on top of stack */ with ANY ANY yields %1 %2 %1 %2 - pat exg $1==QUAD /* Exchange top two words on stack */ + pat exg $1<=QUAD /* Exchange top two words on stack */ with ANY ANY yields %1 %2 + pat exg $1==(2*QUAD) /* Exchange top two word pairs on stack */ + with ANY ANY ANY ANY + yields %2 %1 %4 %3 + pat stl lol $1==$2 /* Store then load local */ leaving dup QUAD stl $1 - + pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ leaving dup $2 lal $1 sti $2 - + pat ste loe $1==$2 /* Store then load external */ leaving - dup 4 + dup QUAD ste $1 @@ -1126,7 +1130,7 @@ PATTERNS pat cmf tne call cmf_t("add.ne") /* top = float (second != top) */ pat cmf tlt call cmf_t("add.lo") /* top = float (second < top) */ pat cmf tle call cmf_t("add.ls") /* top = float (second <= top) */ - pat cmf tgt call cmf_t("add.hi") /* top = float (second < top) */ + pat cmf tgt call cmf_t("add.hi") /* top = float (second > top) */ pat cmf tge call cmf_t("add.hs") /* top = float (second >= top) */ proc fallback_t example teq @@ -1142,7 +1146,7 @@ PATTERNS pat tne call fallback_t("add.ne") /* top = float (top != 0) */ pat tlt call fallback_t("add.lo") /* top = float (top < 0) */ pat tle call fallback_t("add.ls") /* top = float (top <= 0) */ - pat tgt call fallback_t("add.hi") /* top = float (top < 0) */ + pat tgt call fallback_t("add.hi") /* top = float (top > 0) */ pat tge call fallback_t("add.hs") /* top = float (top >= 0) */ @@ -1151,6 +1155,7 @@ PATTERNS proc anyz example zeq with GPR STACK + kills ALL gen cmp %1, {CONST, 0} beq[1] {LABEL, $1} @@ -1164,6 +1169,7 @@ PATTERNS proc anyb example beq with GPR+CONST GPR STACK + kills ALL gen cmp %2, %1 beq[1] {LABEL, $1} @@ -1177,6 +1183,7 @@ PATTERNS proc cmu_z example cmu zeq with GPR+CONST GPR STACK + kills ALL gen cmp %2, %1 beq[1] {LABEL, $2} @@ -1196,6 +1203,7 @@ PATTERNS proc cmf_z example cmu zeq with GPR GPR STACK + kills ALL gen fcmp %2, %2, %1 beq[1] {LABEL, $2} @@ -1258,6 +1266,7 @@ PATTERNS pat bra with STACK + kills ALL gen b {LABEL, $1} @@ -1452,13 +1461,23 @@ PATTERNS ste ".reghp" pat ass /* Adjust stack by variable amount */ - with CONST+GPR + with CONST+GPR STACK gen add SP, %1 pat asp $1==QUAD /* Adjust stack by constant amount */ - gen - pop SCRATCH + with GPR + /* silently ignore GPR */ + with STACK + gen + pop SCRATCH + + pat asp $1==(2*QUAD) /* Adjust stack by constant amount */ + with GPR GPR + /* silently ignore GPR */ + with STACK + gen + add SP, {CONST, 2*QUAD} pat asp /* Adjust stack by constant amount */ leaving From eaf4339cd638777d787e34c63397b23ce3b0e6c3 Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 9 Jun 2013 22:16:30 +0100 Subject: [PATCH 63/76] Implement a very crude busy-wait based select() mechanism for consol input. --HG-- branch : dtrg-videocore rename : plat/rpi/include/ack/config.h => plat/rpi/include/sys/select.h rename : plat/rpi/libsys/time.c => plat/rpi/libsys/select.c --- plat/rpi/build.mk | 3 +- plat/rpi/include/sys/select.h | 13 +++++++ plat/rpi/include/unistd.h | 12 +++++++ plat/rpi/libsys/libsys.h | 1 + plat/rpi/libsys/pi_uart.s | 14 ++++++++ plat/rpi/libsys/select.c | 65 +++++++++++++++++++++++++++++++++++ 6 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 plat/rpi/include/sys/select.h create mode 100644 plat/rpi/libsys/select.c diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 4ddaeb489..4db4a4849 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -36,7 +36,8 @@ platform-libsys := \ time.c \ signal.c \ tcgetattr.c \ - tcsetattr.c + tcsetattr.c \ + select.c $(eval $(call build-platform)) diff --git a/plat/rpi/include/sys/select.h b/plat/rpi/include/sys/select.h new file mode 100644 index 000000000..df7488da4 --- /dev/null +++ b/plat/rpi/include/sys/select.h @@ -0,0 +1,13 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _SYS_SELECT_H +#define _SYS_SELECT_H + +#include + +#endif diff --git a/plat/rpi/include/unistd.h b/plat/rpi/include/unistd.h index 4cbf43c58..a4d0c4507 100644 --- a/plat/rpi/include/unistd.h +++ b/plat/rpi/include/unistd.h @@ -90,4 +90,16 @@ typedef void (*sighandler_t)(int); extern sighandler_t signal(int signum, sighandler_t handler); extern int raise(int signum); +/* Select */ + +typedef uint32_t fd_set; + +extern int select(int nfds, fd_set *readfds, fd_set *writefds, + fd_set *exceptfds, struct timeval *timeout); + +#define FD_ZERO(set) do { *set = 0; } while (0) +#define FD_SET(fd, set) do { *set |= (1< +#include +#include +#include +#include +#include "libsys.h" + +#define TICKS_PER_SEC 1000000 + +typedef int condition_t(void); + +static int nop_condition(void) +{ + return 0; +} + +int select(int nfds, fd_set *readfds, fd_set *writefds, + fd_set *exceptfds, struct timeval *timeout) +{ + int result = 0; + condition_t* condition = nop_condition; + + if (FD_ISSET(0, readfds)) + condition = _sys_rawpoll; + + FD_ZERO(readfds); + FD_ZERO(writefds); + FD_ZERO(exceptfds); + + if (timeout) + { + /* Wait for a specified amount of time. */ + + uint32_t ticks = (timeout->tv_sec * TICKS_PER_SEC) + + (timeout->tv_usec * (TICKS_PER_SEC/1000000)); + uint32_t* timer_clo = pi_phys_to_user((void*) 0x7e003004); + uint32_t ra = *timer_clo; + + while (!condition() && ((*timer_clo - ra) < ticks)) + ; + } + else + { + /* Wait forever. */ + + while (!condition()) + ; + + } + + if ((condition == _sys_rawpoll) && condition()) + { + FD_SET(0, readfds); + result = 1; + } + + return result; +} From 052dd9bfc0aeb6665f07f0fd3bf5a2c9bdc71315 Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 20 Jun 2013 00:14:55 +0100 Subject: [PATCH 64/76] Actually remember to set the stack pointer on startup. Reduce to stack from 16kB to a much more reasonable 1kB. --HG-- branch : dtrg-videocore --- plat/rpi/boot.s | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 71081d2cb..8bee47aba 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -16,7 +16,7 @@ .sect .text #define gp r15 -#define STACKSIZE 16*1024 +#define STACKSIZE 1*1024 ! MAIN ENTRY POINT @@ -46,6 +46,7 @@ _1: st lr, .returnlr lea gp, begtext + lea sp, .stack + STACKSIZE ! Save the kernel parameters. From 55be35a68a9754a4c5c34ae2ff18ff1005b75d84 Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 20 Jun 2013 00:15:14 +0100 Subject: [PATCH 65/76] Add a stub malloc.h. --HG-- branch : dtrg-videocore rename : lang/cem/libcc.ansi/headers/stdlib.h => lang/cem/libcc.ansi/headers/malloc.h --- lang/cem/libcc.ansi/build.mk | 1 + lang/cem/libcc.ansi/headers/malloc.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 lang/cem/libcc.ansi/headers/malloc.h diff --git a/lang/cem/libcc.ansi/build.mk b/lang/cem/libcc.ansi/build.mk index 5987de9e3..d4300311d 100644 --- a/lang/cem/libcc.ansi/build.mk +++ b/lang/cem/libcc.ansi/build.mk @@ -16,6 +16,7 @@ define build-libcc-ansi-headers-impl float.h \ limits.h \ math.h \ + malloc.h \ setjmp.h \ signal.h \ stdarg.h \ diff --git a/lang/cem/libcc.ansi/headers/malloc.h b/lang/cem/libcc.ansi/headers/malloc.h new file mode 100644 index 000000000..73cef551e --- /dev/null +++ b/lang/cem/libcc.ansi/headers/malloc.h @@ -0,0 +1,14 @@ +/* + * stdlib.h - standard library + * + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Id$ */ + +#ifndef _MALLOC_H +#define _MALLOC_H + +#include + +#endif From fd2360be0f9d6fdd91240ed32873f910edf13825 Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 21 Jun 2013 23:20:50 +0100 Subject: [PATCH 66/76] Ship assembler man pages. --HG-- branch : dtrg-videocore rename : man/8080_as.6 => man/i80_as.6 rename : man/m68k2_as.6 => man/m68020_as.6 --- mach/proto/as/build.mk | 4 ++++ man/.distr | 5 +++-- man/{8080_as.6 => i80_as.6} | 0 man/{m68k2_as.6 => m68020_as.6} | 0 4 files changed, 7 insertions(+), 2 deletions(-) rename man/{8080_as.6 => i80_as.6} (100%) rename man/{m68k2_as.6 => m68020_as.6} (100%) diff --git a/mach/proto/as/build.mk b/mach/proto/as/build.mk index 791611f3d..a67277ac2 100644 --- a/mach/proto/as/build.mk +++ b/mach/proto/as/build.mk @@ -41,6 +41,10 @@ $(OBJDIR)/$D/preprocessed-comm2.y: mach/proto/as/comm2.y $(CPPANSI) \ $(call file, $(LIBOBJECT)) $(call cprogram, $(BINDIR)/$(PLATFORM)/as) $(call installto, $(PLATDEP)/$(PLATFORM)/as) + + $(call reset) + $(call file, man/$(ARCH)_as.6) + $(call installto, $(INSDIR)/share/man/man6/$(ARCH)_as.6) endef build-as = $(eval $(build-as-impl)) diff --git a/man/.distr b/man/.distr index d21bfd287..f3262226b 100644 --- a/man/.distr +++ b/man/.distr @@ -2,12 +2,13 @@ 6800_as.6 6805_as.6 6809_as.6 -8080_as.6 +i80_as.6 z8000_as.6 i86_as.6 i386_as.6 -m68k2_as.6 +m68020_as.6 ns_as.6 +vc4_as.6 pdp_as.6 z80_as.6 em_cg.6 diff --git a/man/8080_as.6 b/man/i80_as.6 similarity index 100% rename from man/8080_as.6 rename to man/i80_as.6 diff --git a/man/m68k2_as.6 b/man/m68020_as.6 similarity index 100% rename from man/m68k2_as.6 rename to man/m68020_as.6 From 2be811bac24d6bfb477fbd29df3df6280e145e23 Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 21 Jun 2013 23:21:08 +0100 Subject: [PATCH 67/76] Updated VC4 docs. --HG-- branch : dtrg-videocore --- man/vc4_as.6 | 45 +++++++++++++++++++++++++++++++++++++++++++++ plat/rpi/README | 3 +++ 2 files changed, 48 insertions(+) create mode 100644 man/vc4_as.6 diff --git a/man/vc4_as.6 b/man/vc4_as.6 new file mode 100644 index 000000000..81b10813d --- /dev/null +++ b/man/vc4_as.6 @@ -0,0 +1,45 @@ +.\" $Header$ +.TH VC4_AS 1 +.ad +.SH NAME +vc4_as \- assembler for Broadcom VideoCore IV + +.SH SYNOPSIS +/usr/em/lib/vc4_as [options] argument ... + +.SH DESCRIPTION +This assembler is made with the general framework +described in \fIuni_ass\fP(6). + +.SH SYNTAX +The assembler uses a modified version of the syntax described in +https://github.com/hermanhermitage/videocoreiv/wiki/VideoCore-IV-Programmers-Manual: +condition codes must be prefixed with a full stop. Vector instructions are not +yet supported. + +.SH "SEE ALSO" +uni_ass(6), +ack(1), +.br +https://github.com/hermanhermitage/videocoreiv +.SH EXAMPLE +.nf +.ta 8n 16n 24n 32n 40n 48n +An example of VideoCore IV assembly language: + + ldb r0, __uart_status + b.eq r0, #0, 1b + + ! receive 1 byte (returned in r0) + mov r1, #AUX_MU_LSR_REG + mov r2, #AUX_MU_IO_REG + ! loop until char available +recvwait: + ld r3, (r1) + and r3, #0x1 + b.ne r3, #0x1, recvwait + + ldb r0, (r2) +1: + b lr +.fi diff --git a/plat/rpi/README b/plat/rpi/README index f8836724c..7d78433dd 100644 --- a/plat/rpi/README +++ b/plat/rpi/README @@ -5,6 +5,9 @@ This is a fairly crude port of the ACK to produce VideoCore IV machine code, suitable for use on the Raspberry Pi. It produces terrible but working code. The resulting binaries can be used either bare metal or loaded as a GPU kernel and executed using a modified mailbox.c (see below). +Currently floating point support is present but incomplete; and as the +VideoCore IV does not have double-precision float support, the C compiler +treats doubles as single precision. As much of the standard C library as is relevant works; if you're running in bare-metal mode, you can hook stdin/stdout up to the From d94c1c81502771486a92f4543e05abf32d405000 Mon Sep 17 00:00:00 2001 From: David Given Date: Fri, 21 Jun 2013 23:38:21 +0100 Subject: [PATCH 68/76] Updated distr files. --HG-- branch : dtrg-videocore rename : mach/i80/.distr => mach/vc4/.distr rename : plat/cpm/.distr => plat/rpi/.distr --- .distr | 3 +++ lang/cem/libcc.ansi/headers/.distr | 1 + lang/cem/libcc.ansi/stdio/.distr | 2 ++ lang/cem/libcc.ansi/stdlib/.distr | 1 + lang/cem/libcc.ansi/string/.distr | 1 + mach/.distr | 1 + mach/vc4/.distr | 5 +++++ mach/vc4/as/.distr | 1 + mach/vc4/libem/.distr | 3 +++ man/.distr | 8 -------- plat/rpi/.distr | 31 ++++++++++++++++++++++++++++++ plat/rpi/boot.s | 4 ++++ 12 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 mach/vc4/.distr create mode 100644 mach/vc4/libem/.distr create mode 100644 plat/rpi/.distr diff --git a/.distr b/.distr index dbe964f93..1009a685e 100644 --- a/.distr +++ b/.distr @@ -48,6 +48,7 @@ mach/i80 mach/i86 mach/i386 mach/m68020 +mach/vc4 plat plat/cpm @@ -55,5 +56,7 @@ plat/pc86 plat/linux plat/linux386 plat/linux68k +plat/rpi examples +man diff --git a/lang/cem/libcc.ansi/headers/.distr b/lang/cem/libcc.ansi/headers/.distr index cf35cec17..22ee8d0c4 100644 --- a/lang/cem/libcc.ansi/headers/.distr +++ b/lang/cem/libcc.ansi/headers/.distr @@ -20,3 +20,4 @@ stdbool.h fcntl.h tgmath.h locale.h +malloc.h diff --git a/lang/cem/libcc.ansi/stdio/.distr b/lang/cem/libcc.ansi/stdio/.distr index 5ad14e146..4206946e7 100644 --- a/lang/cem/libcc.ansi/stdio/.distr +++ b/lang/cem/libcc.ansi/stdio/.distr @@ -39,6 +39,7 @@ rewind.c scanf.c setbuf.c setvbuf.c +snprintf.c sprintf.c sscanf.c tmpfile.c @@ -47,3 +48,4 @@ ungetc.c vfprintf.c vprintf.c vsprintf.c +vsnprintf.c diff --git a/lang/cem/libcc.ansi/stdlib/.distr b/lang/cem/libcc.ansi/stdlib/.distr index a556b3058..dd4a54186 100644 --- a/lang/cem/libcc.ansi/stdlib/.distr +++ b/lang/cem/libcc.ansi/stdlib/.distr @@ -17,6 +17,7 @@ mbstowcs.c mbtowc.c qsort.c rand.c +setenv.c strtod.c strtol.c system.c diff --git a/lang/cem/libcc.ansi/string/.distr b/lang/cem/libcc.ansi/string/.distr index f7ff5cc28..ef6a1164e 100644 --- a/lang/cem/libcc.ansi/string/.distr +++ b/lang/cem/libcc.ansi/string/.distr @@ -9,6 +9,7 @@ strcmp.c strcoll.c strcpy.c strcspn.c +strdup.c strerror.c strlen.c strncat.c diff --git a/mach/.distr b/mach/.distr index fb9d533eb..d2137b302 100644 --- a/mach/.distr +++ b/mach/.distr @@ -25,6 +25,7 @@ s2650 sun3 sun2 vax4 +vc4 xenix3 z80 z8000 diff --git a/mach/vc4/.distr b/mach/vc4/.distr new file mode 100644 index 000000000..38864464f --- /dev/null +++ b/mach/vc4/.distr @@ -0,0 +1,5 @@ +build.mk +as +ncg +libem +libend diff --git a/mach/vc4/as/.distr b/mach/vc4/as/.distr index 8ebe3797c..6ef7a1516 100644 --- a/mach/vc4/as/.distr +++ b/mach/vc4/as/.distr @@ -4,3 +4,4 @@ mach2.c mach3.c mach4.c mach5.c +binary.h diff --git a/mach/vc4/libem/.distr b/mach/vc4/libem/.distr new file mode 100644 index 000000000..5dff9f5ac --- /dev/null +++ b/mach/vc4/libem/.distr @@ -0,0 +1,3 @@ +csa.s +csb.s +videocore.h diff --git a/man/.distr b/man/.distr index f3262226b..4e52c6c32 100644 --- a/man/.distr +++ b/man/.distr @@ -1,15 +1,8 @@ -6500_as.6 -6800_as.6 -6805_as.6 -6809_as.6 i80_as.6 -z8000_as.6 i86_as.6 i386_as.6 m68020_as.6 -ns_as.6 vc4_as.6 -pdp_as.6 z80_as.6 em_cg.6 em_ncg.6 @@ -18,4 +11,3 @@ libpc.7 head pc_prlib.7 uni_ass.6 -proto.make diff --git a/plat/rpi/.distr b/plat/rpi/.distr new file mode 100644 index 000000000..529eec7e9 --- /dev/null +++ b/plat/rpi/.distr @@ -0,0 +1,31 @@ +descr +boot.s +build.mk +README +include/ack/config.h +include/sys/select.h +include/unistd.h +include/pi.h +include/termios.h +libsys/brk.c +libsys/close.c +libsys/creat.c +libsys/errno.s +libsys/getpid.c +libsys/_hol0.s +libsys/isatty.c +libsys/kill.c +libsys/libsysasm.h +libsys/libsys.h +libsys/lseek.c +libsys/open.c +libsys/pi_phys_to_user.s +libsys/pi_uart.s +libsys/pi_user_to_phys.s +libsys/read.c +libsys/select.c +libsys/signal.c +libsys/tcgetattr.c +libsys/tcsetattr.c +libsys/time.c +libsys/write.c diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 8bee47aba..a76d1e7a7 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -110,6 +110,10 @@ __exit: .define _pi_kernel_parameters .comm _pi_kernel_parameters, 4 +.define .linenumber, .filename +.comm .linenumber, 4 ! current linenumber (used for debugging) +.comm .filename, 4 ! ptr to current filename (used for debugging) + ! User stack. .comm .stack, STACKSIZE From c72eaef8ee959d9a250b1c3946b91ad25b2b7c75 Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 24 Jun 2013 23:57:27 +0100 Subject: [PATCH 69/76] Add routine to set the Raspberry Pi clock to 250MHz (loads faster than the default 19.2MHz). --HG-- branch : dtrg-videocore rename : plat/rpi/libsys/pi_user_to_phys.s => plat/rpi/libsys/pi_fast_mode.s --- plat/rpi/build.mk | 1 + plat/rpi/include/pi.h | 12 ++++++ plat/rpi/libsys/pi_fast_mode.s | 70 ++++++++++++++++++++++++++++++++++ plat/rpi/libsys/pi_uart.s | 15 ++++++-- 4 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 plat/rpi/libsys/pi_fast_mode.s diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk index 4db4a4849..c89847607 100644 --- a/plat/rpi/build.mk +++ b/plat/rpi/build.mk @@ -23,6 +23,7 @@ platform-libsys := \ pi_phys_to_user.s \ pi_user_to_phys.s \ pi_uart.s \ + pi_fast_mode.s \ creat.c \ close.c \ open.c \ diff --git a/plat/rpi/include/pi.h b/plat/rpi/include/pi.h index 656543bde..24e89a9a1 100644 --- a/plat/rpi/include/pi.h +++ b/plat/rpi/include/pi.h @@ -32,5 +32,17 @@ extern void* pi_phys_to_user(void* ptr); /* Converts a pointer from a user address to a physical address. */ extern void* pi_user_to_phys(void* ptr); +/* Change the clock speed from 19.2MHz to 250MHz. Must be called *before* + * pi_init_uart(). */ +extern void pi_fast_mode(void); + +/* Initialise the RAM. */ +extern void pi_init_ram(void); + +/* The current clock speed (used by pi_init_uart to calculate the correct + * UART settings). */ + +extern int pi_clock_speed; + #endif diff --git a/plat/rpi/libsys/pi_fast_mode.s b/plat/rpi/libsys/pi_fast_mode.s new file mode 100644 index 000000000..8b50990f0 --- /dev/null +++ b/plat/rpi/libsys/pi_fast_mode.s @@ -0,0 +1,70 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +#define PASSWD 0x5a000000 +#define PLLC 5 +#define OSC 1 + +#define A2W 0x7e102000 +#define A2W_PLLC_MULT 0x7e102020 +#define A2W_PLLC_MULT2 0x7e102120 +#define A2W_PLLC_MULT_FRACT 0x7e102220 +#define A2W_PLLx_DIV 0x7e102620 + +#define CM 0x7e101000 +#define CM_VPU_CTL 0x7e101008 +#define CM_VPU_DIV 0x7e10100c +#define CM_TIME_DIV 0x7e1010ec +#define CM_TIME_CTL 0x7e1010e8 + +#define hash # +#define copy(A) A +#define poke(A, V) \ + mov r0, copy(hash) V; mov r1, copy(hash) A; st r0, (r1) + +! Changes the clock speed to 250MHz. + +.define _pi_fast_mode +_pi_fast_mode: + poke(A2W + 0x190, 0x5a000001) + poke(A2W_PLLC_MULT_FRACT, PASSWD | 87380) + poke(A2W_PLLC_MULT2, PASSWD | 52 | 0x1000) + poke(A2W + 0x3c, 0x5a000100) + poke(A2W + 0x38, 0x5a000000) + poke(A2W + 0x34, 0x5a144000) + poke(A2W + 0x30, 0x5a000000) + poke(CM + 0x108, 0x5a000200) + poke(CM + 0x108, 0x5a0002aa) + poke(A2W + 0x2c, 0x5a000000) + poke(A2W + 0x28, 0x5a400000) + poke(A2W + 0x24, 0x5a000005) + poke(A2W_PLLC_MULT, PASSWD | 52 | 0x555000) + poke(A2W_PLLC_MULT2, PASSWD | 52 | 0x21000) + poke(A2W + 0x2c, 0x5a000042) + poke(A2W + 0x28, 0x5a500401) + poke(A2W + 0x24, 0x5a004005) + poke(A2W_PLLC_MULT, PASSWD | 52 | 0x555000) + poke(A2W_PLLx_DIV, PASSWD | 2) + poke(CM + 0x108, 0x5a0002ab) + poke(CM + 0x108, 0x5a0002aa) + poke(CM + 0x108, 0x5a0002a8) + poke(CM_VPU_CTL, PASSWD | 0x200 | OSC | 0x40) + poke(CM_VPU_DIV, PASSWD | [4 << 12]) + poke(CM_VPU_CTL, PASSWD | PLLC | 0x40) + poke(CM_VPU_CTL, PASSWD | PLLC | 0x50) + poke(CM_TIME_DIV, PASSWD | [19 << 12] | 819) + poke(CM_TIME_CTL, PASSWD | OSC | 0x10) + + mov r0, #250000000 + st r0, _pi_clock_speed + b lr + diff --git a/plat/rpi/libsys/pi_uart.s b/plat/rpi/libsys/pi_uart.s index d52069d71..b7ce9898e 100644 --- a/plat/rpi/libsys/pi_uart.s +++ b/plat/rpi/libsys/pi_uart.s @@ -14,9 +14,6 @@ ! So be careful with your serial/terminal, some adjustment may be necessary. TARGET_BAUD_RATE = 115200 -! System clock is running directly off the 19.2MHz crystal at initial reset -SYSTEM_CLOCK = 19200000 - GPFSEL1 = 0x7e200004 GPSET0 = 0x7e20001C GPCLR0 = 0x7e200028 @@ -102,7 +99,10 @@ delay2: st r0, (r1) mov r1, #AUX_MU_BAUD_REG - mov r0, #[[SYSTEM_CLOCK/[TARGET_BAUD_RATE*8]]-1] + ld r0, _pi_clock_speed + mov r2, #TARGET_BAUD_RATE*8 + divu r0, r0, r2 + sub r0, #1 st r0, (r1) mov r1, #AUX_MU_LCR_REG @@ -176,3 +176,10 @@ recvwait: b lr .comm __uart_status, 1 + +.sect .data +.define _pi_clock_speed + +! System clock is running directly off the 19.2MHz crystal at initial reset +_pi_clock_speed: + .data4 19200000 From 2b3f95de0bcab8b5d037095d8dfff0ba3b61c0dd Mon Sep 17 00:00:00 2001 From: David Given Date: Wed, 26 Jun 2013 23:32:54 +0100 Subject: [PATCH 70/76] Fix jump range checking in the addcmpb family of instructions. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index de2bda2ad..350806e11 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -394,7 +394,7 @@ static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr) d -= DOTGAIN; d /= 2; - if (!fitx(d, bits)) + if ((pass == 2) && !fitx(d, bits)) serror("target of branch is too far away"); emit2(opcode | maskx(d, bits)); From 29af6f1adbdd7deb17851ce9e166d0d27dadf4c9 Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 27 Jun 2013 11:25:50 +0100 Subject: [PATCH 71/76] ISA change: clz has been renamed to log2. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index 0ae247226..502894a9f 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -106,7 +106,7 @@ 0, OP_ALU, B8(00011000), "exts", 0, OP_ALU, B8(00011001), "neg", 0, OP_ALU, B8(00011010), "lsr", -0, OP_ALU, B8(00011011), "clz", +0, OP_ALU, B8(00011011), "log2", 0, OP_ALU, B8(00011100), "lsl", 0, OP_ALU, B8(00011101), "brev", 0, OP_ALU, B8(00011110), "asr", @@ -145,4 +145,4 @@ 0, OP_MEM, B8(00000110), "ldhs", 0, OP_MEM, B8(00000111), "sths", -0, OP_LEA, 0, "lea", \ No newline at end of file +0, OP_LEA, 0, "lea", From edb174da8d16d7c7429a790c4ee985996f848105 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 29 Jun 2013 00:32:39 +0100 Subject: [PATCH 72/76] Fix incorrect encoding of 'push lr' and 'pop pc'. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 350806e11..e98f67594 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -174,13 +174,11 @@ void stack_instr(quad opcode, int loreg, int hireg, int extrareg) case 26: /* lr */ extrareg = 26; hireg = loreg = -1; - b = 0; break; case 31: /* pc */ extrareg = 31; hireg = loreg = -1; - b = 0; break; default: @@ -204,7 +202,10 @@ void stack_instr(quad opcode, int loreg, int hireg, int extrareg) serror("invalid register range"); if (hireg == -1) - m = 31; + { + b = 3; + m = 15; + } else m = hireg - loreg; From 8b6951dac002428c8a8585137418754cfeae1a56 Mon Sep 17 00:00:00 2001 From: David Given Date: Sat, 29 Jun 2013 00:35:07 +0100 Subject: [PATCH 73/76] Fix incorrect offset encoding in lea (sp) instructions. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach5.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index e98f67594..5f10facf0 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -457,6 +457,7 @@ void lea_stack_instr(int rd, long va, int rs) if (rs != 25) serror("source register must be sp"); + va /= 4; if (!fitx(va, 6)) serror("offset too big to encode in instruction"); va = maskx(va, 6); From e36d739fa44c27e6bb1d3154d19bb1eaf4abb856 Mon Sep 17 00:00:00 2001 From: David Given Date: Mon, 1 Jul 2013 13:05:36 +0100 Subject: [PATCH 74/76] Add (largely untested) float/int conversion. --HG-- branch : dtrg-videocore --- mach/vc4/as/mach1.c | 3 ++- mach/vc4/as/mach2.c | 1 + mach/vc4/as/mach3.c | 5 +++++ mach/vc4/as/mach4.c | 9 +++++++++ mach/vc4/as/mach5.c | 10 ++++++++++ mach/vc4/ncg/table | 47 ++++++++++++++++++++++----------------------- 6 files changed, 50 insertions(+), 25 deletions(-) diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c index 315468f8e..44caf0c57 100644 --- a/mach/vc4/as/mach1.c +++ b/mach/vc4/as/mach1.c @@ -24,4 +24,5 @@ extern void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct extern void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr); extern void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr); extern void lea_stack_instr(int rd, long va, int rs); -extern void lea_address_instr(int rd, struct expr_t* expr); \ No newline at end of file +extern void lea_address_instr(int rd, struct expr_t* expr); +extern void fltcnv_instr(quad opcode, int cc, int rd, int ra, quad shift); diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c index 4c02efb39..ec7a948d8 100644 --- a/mach/vc4/as/mach2.c +++ b/mach/vc4/as/mach2.c @@ -17,6 +17,7 @@ %token OP_MEM %token OP_MISC %token OP_MISCL +%token OP_FLTCNV %token OP_STACK %token OP_LEA diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c index 502894a9f..4ca73f4ed 100644 --- a/mach/vc4/as/mach3.c +++ b/mach/vc4/as/mach3.c @@ -130,6 +130,11 @@ 0, OP_MISC, B16(11001001,11100000), "exp2", 0, OP_MISC, B16(11000101,11100000), "adds256", +0, OP_FLTCNV, B16(11001010,00000000), "ftrunc", +0, OP_FLTCNV, B16(11001010,00100000), "floor", +0, OP_FLTCNV, B16(11001010,01000000), "flts", +0, OP_FLTCNV, B16(11001010,01100000), "fltu", + 0, OP_MISCL, B16(11000100,10000000), "divs", 0, OP_MISCL, B16(11000100,11100000), "divu", diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c index ef0b50082..99c368c12 100644 --- a/mach/vc4/as/mach4.c +++ b/mach/vc4/as/mach4.c @@ -74,5 +74,14 @@ operation | OP_LEA GPR ',' absexp '(' GPR ')' { lea_stack_instr($2, $4, $6); } | OP_LEA GPR ',' expr { lea_address_instr($2, &$4); } + + | OP_FLTCNV GPR ',' GPR { fltcnv_instr($1, ALWAYS, $2, $4, 0); } + | OP_FLTCNV CC GPR ',' GPR { fltcnv_instr($1, $2, $3, $5, 0); } + | OP_FLTCNV GPR ',' GPR ',' shift '#' absexp { fltcnv_instr($1, ALWAYS, $2, $4, $8); } + | OP_FLTCNV CC GPR ',' GPR ',' shift '#' absexp { fltcnv_instr($1, $2, $3, $5, $9); } ; +shift + : 'l' 's' 'r' + | 'l' 's' 'l'; + diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index 350806e11..9166f340e 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -487,3 +487,13 @@ void lea_address_instr(int rd, struct expr_t* expr) emit4(expr->val - pc); } +/* Floating point conversion opcodes (ftrunc, floor, flts, fltu). */ + +void fltcnv_instr(quad opcode, int cc, int rd, int ra, quad shift) +{ + fitx(shift, 6); + + emit2(opcode | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | shift); +} + diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table index 963fb74e1..8b4fe60a5 100644 --- a/mach/vc4/ncg/table +++ b/mach/vc4/ncg/table @@ -131,8 +131,11 @@ INSTRUCTIONS fadd GPR:wo, GPR:ro, GPR:ro. fcmp GPR:wo, GPR:ro, GPR:ro. fdiv GPR:wo, GPR:ro, GPR:ro. + flts GPR:wo, GPR:ro. + fltu GPR:wo, GPR:ro. fmul GPR:wo, GPR:ro, GPR:ro. fsub GPR:wo, GPR:ro, GPR:ro. + ftrunc GPR:wo, GPR:ro. ld GPR:wo, GPRINC:rw. ld GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. ldb GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. @@ -1511,36 +1514,32 @@ PATTERNS nop pat loc loc cfi $1==$2 && $1==QUAD /* Convert float -> integer */ - leaving - loc 0 -#if 0 - cal ".cfi" - lfr QUAD -#endif + with GPR + uses reusing %1, REG + gen + ftrunc %a, %1 + yields %a pat loc loc cfu $1==$2 && $1==QUAD /* Convert float -> unsigned */ - leaving - loc 0 -#if 0 - cal ".cfu" - lfr QUAD -#endif + with GPR + uses reusing %1, REG + gen + ftrunc %a, %1 + yields %a pat loc loc cif $1==$2 && $1==QUAD /* Convert integer -> float */ - leaving - loc 0 -#if 0 - cal ".cif" - lfr QUAD -#endif + with GPR + uses reusing %1, REG + gen + flts %a, %1 + yields %a pat loc loc cuf $1==$2 && $1==QUAD /* Convert unsigned -> float */ - leaving - loc 0 -#if 0 - cal ".cuf" - lfr QUAD -#endif + with GPR + uses reusing %1, REG + gen + fltu %a, %1 + yields %a pat fef /* Split float */ leaving From 171f16d7b8884dec5d915410e774a60a9d24f0cc Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 18 Jun 2015 23:38:50 +0200 Subject: [PATCH 75/76] Rename branch. --HG-- branch : dtrg-videocore-branch From f07c6e4d6a62694b8907c5f5c8b53f6504b94e8b Mon Sep 17 00:00:00 2001 From: David Given Date: Thu, 18 Jun 2015 23:39:25 +0200 Subject: [PATCH 76/76] Rename branch. --HG-- branch : dtrg-videocore-branch-branch