diff --git a/.distr b/.distr index dbe964f93..1009a685e 100644 --- a/.distr +++ b/.distr @@ -48,6 +48,7 @@ mach/i80 mach/i86 mach/i386 mach/m68020 +mach/vc4 plat plat/cpm @@ -55,5 +56,7 @@ plat/pc86 plat/linux plat/linux386 plat/linux68k +plat/rpi examples +man diff --git a/Makefile b/Makefile index e203d3243..dbac9cabd 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,8 @@ CFLAGS += \ LDFLAGS += +ACKFLAGS = -Ih + all: installables .DELETE_ON_ERROR: @@ -101,6 +103,7 @@ include mach/i386/build.mk include mach/i86/build.mk include mach/m68020/build.mk # include mach/powerpc/build.mk +include mach/vc4/build.mk include plat/build.mk include plat/pc86/build.mk @@ -108,6 +111,7 @@ include plat/cpm/build.mk include plat/linux386/build.mk include plat/linux68k/build.mk # include plat/linuxppc/build.mk +include plat/rpi/build.mk .PHONY: installables installables: $(INSTALLABLES) diff --git a/README b/README index 3b0d877fa..74ca13b55 100644 --- a/README +++ b/README @@ -32,6 +32,7 @@ pc86 produces bootable floppy disk images for 8086 PCs linux386 produces ELF executables for PC Linux systems linux68k produces ELF executables for m68020 Linux systems cpm produces i80 CP/M .COM files +rpi produces Raspberry Pi GPU binaries INSTALLATION @@ -128,7 +129,7 @@ GOTCHAS There are some things you should be aware of. -- Look at plat//README for information about the two supported +- Look at plat//README for information about the supported platforms. - The library support is fairly limited; for C, it's at roughly the ANSI C diff --git a/first/core.mk b/first/core.mk index 261a2c30e..0ee653a1c 100644 --- a/first/core.mk +++ b/first/core.mk @@ -3,6 +3,7 @@ define reset $(eval o :=) $(eval s :=) $(eval cflags :=) + $(eval ackflags :=) $(eval ldflags :=) $(eval objdir :=) endef diff --git a/h/out.h b/h/out.h index 0dffd2908..cf6ef7776 100644 --- a/h/out.h +++ b/h/out.h @@ -64,6 +64,7 @@ struct outname { #define RELO4 3 /* 4 bytes */ #define RELOPPC 4 /* PowerPC 26-bit address */ #define RELOH2 5 /* write top 2 bytes of 4 byte word */ +#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */ #define RELPC 0x08 /* pc relative */ #define RELBR 0x10 /* High order byte lowest address. */ diff --git a/lang/cem/libcc.ansi/build.mk b/lang/cem/libcc.ansi/build.mk index 5987de9e3..d4300311d 100644 --- a/lang/cem/libcc.ansi/build.mk +++ b/lang/cem/libcc.ansi/build.mk @@ -16,6 +16,7 @@ define build-libcc-ansi-headers-impl float.h \ limits.h \ math.h \ + malloc.h \ setjmp.h \ signal.h \ stdarg.h \ diff --git a/lang/cem/libcc.ansi/headers/.distr b/lang/cem/libcc.ansi/headers/.distr index cf35cec17..22ee8d0c4 100644 --- a/lang/cem/libcc.ansi/headers/.distr +++ b/lang/cem/libcc.ansi/headers/.distr @@ -20,3 +20,4 @@ stdbool.h fcntl.h tgmath.h locale.h +malloc.h diff --git a/lang/cem/libcc.ansi/headers/malloc.h b/lang/cem/libcc.ansi/headers/malloc.h new file mode 100644 index 000000000..64a6de16c --- /dev/null +++ b/lang/cem/libcc.ansi/headers/malloc.h @@ -0,0 +1,66 @@ +/* + * stdlib.h - standard library + * + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* $Id$ */ + +#ifndef _STDLIB_H +#define _STDLIB_H + +#include + +#define EXIT_FAILURE 1 +#define EXIT_SUCCESS 0 +#define RAND_MAX 32767 +#define MB_CUR_MAX sizeof(wchar_t) + +typedef struct { int quot, rem; } div_t; +typedef struct { long quot, rem; } ldiv_t; + +extern double atof(const char *_nptr); +extern int atoi(const char *_nptr); +extern long atol(const char *_nptr); +extern double strtod(const char *_nptr, char **_endptr); +extern long strtol(const char *_nptr, char **_endptr, int _base); +extern unsigned long strtoul(const char *_nptr, char **_endptr, int _base); +extern int rand(void); +extern void srand(unsigned int _seed); +extern void* calloc(size_t _nmemb, size_t _size); +extern void free(void *_ptr); +extern void* malloc(size_t _size); +extern void* realloc(void *_ptr, size_t _size); +extern void abort(void); +extern int atexit(void (*_func)(void)); +extern void exit(int _status); +extern void _Exit(int _status); +extern char* getenv(const char *_name); +extern int setenv(const char *_name, const char *_value, int _overwrite); +extern int unsetenv(const char *_name); +extern int putenv(char *_string); +extern int system(const char *_string); +extern void* bsearch(const void *_key, const void *_base, + size_t _nmemb, size_t _size, + int (*_compar)(const void *, const void *)); +extern void qsort(void *_base, size_t _nmemb, size_t _size, + int (*_compar)(const void *, const void *)); +extern int abs(int _j); +extern div_t div(int _numer, int _denom); +extern long labs(long _j); +extern ldiv_t ldiv(long _numer, long _denom); +extern int mblen(const char *_s, size_t _n); +extern int mbtowc(wchar_t *_pwc, const char *_s, size_t _n); +extern int wctomb(char *_s, wchar_t _wchar); +extern size_t mbstowcs(wchar_t *_pwcs, const char *_s, size_t _n); +extern size_t wcstombs(char *_s, const wchar_t *_pwcs, size_t _n); + +/* Extensions (not part of the standard) */ + +#define atof(n) strtod(n, (char **)NULL) +#define atoi(n) ((int)strtol(n, (char **)NULL, 10)) +#define atol(n) strtol(n, (char **)NULL, 10) +#define atoll(n) strtoll(n, (char **)NULL, 10) +#define mblen(s, n) mbtowc((wchar_t *)0, s, n) + +#endif diff --git a/lang/cem/libcc.ansi/headers/stdint.h b/lang/cem/libcc.ansi/headers/stdint.h index 86fd04815..ce9cf3e38 100644 --- a/lang/cem/libcc.ansi/headers/stdint.h +++ b/lang/cem/libcc.ansi/headers/stdint.h @@ -26,7 +26,7 @@ typedef unsigned short uint16_t; #if _EM_WSIZE == 4 typedef signed int int32_t; -typedef unsigned short uint32_t; +typedef unsigned int uint32_t; #else typedef signed long int32_t; typedef unsigned long uint32_t; diff --git a/lang/cem/libcc.ansi/stdio/.distr b/lang/cem/libcc.ansi/stdio/.distr index 5ad14e146..4206946e7 100644 --- a/lang/cem/libcc.ansi/stdio/.distr +++ b/lang/cem/libcc.ansi/stdio/.distr @@ -39,6 +39,7 @@ rewind.c scanf.c setbuf.c setvbuf.c +snprintf.c sprintf.c sscanf.c tmpfile.c @@ -47,3 +48,4 @@ ungetc.c vfprintf.c vprintf.c vsprintf.c +vsnprintf.c diff --git a/lang/cem/libcc.ansi/stdlib/.distr b/lang/cem/libcc.ansi/stdlib/.distr index a556b3058..dd4a54186 100644 --- a/lang/cem/libcc.ansi/stdlib/.distr +++ b/lang/cem/libcc.ansi/stdlib/.distr @@ -17,6 +17,7 @@ mbstowcs.c mbtowc.c qsort.c rand.c +setenv.c strtod.c strtol.c system.c diff --git a/lang/cem/libcc.ansi/string/.distr b/lang/cem/libcc.ansi/string/.distr index f7ff5cc28..ef6a1164e 100644 --- a/lang/cem/libcc.ansi/string/.distr +++ b/lang/cem/libcc.ansi/string/.distr @@ -9,6 +9,7 @@ strcmp.c strcoll.c strcpy.c strcspn.c +strdup.c strerror.c strlen.c strncat.c diff --git a/lib/descr/fe b/lib/descr/fe index a8d610362..4e85c64b3 100644 --- a/lib/descr/fe +++ b/lib/descr/fe @@ -84,7 +84,7 @@ name cem -DEM_SSIZE={s} -DEM_LSIZE={l} -DEM_FSIZE={f} -DEM_DSIZE={d}) \ -D_EM_WSIZE={w} -D_EM_PSIZE={p} \ -D_EM_SSIZE={s} -D_EM_LSIZE={l} -D_EM_FSIZE={f} -D_EM_DSIZE={d} \ - -Vw{w}.{w}i{w}.{w}p{p}.{w}f{f}.{w}s{s}.{s}l{l}.{w}d{d}.{w} \ + -Vw{w}.{wa}i{w}.{wa}p{p}.{pa}f{f}.{fa}s{s}.{sa}l{l}.{la}d{d}.{da}x{x}.{xa} \ {CC_ALIGN?} \ {CEM_F?} {LFLAG?} < > callname acc diff --git a/mach/.distr b/mach/.distr index fb9d533eb..d2137b302 100644 --- a/mach/.distr +++ b/mach/.distr @@ -25,6 +25,7 @@ s2650 sun3 sun2 vax4 +vc4 xenix3 z80 z8000 diff --git a/mach/proto/as/build.mk b/mach/proto/as/build.mk index fce5ec204..726cd676e 100644 --- a/mach/proto/as/build.mk +++ b/mach/proto/as/build.mk @@ -26,7 +26,9 @@ define build-as-impl $(eval CLEANABLES += $(OBJDIR)/$D/preprocessed-comm2.y) $(OBJDIR)/$D/preprocessed-comm2.y: mach/proto/as/comm2.y $(CPPANSI) \ + mach/$(ARCH)/as/mach1.c \ mach/$(ARCH)/as/mach2.c \ + mach/$(ARCH)/as/mach3.c \ mach/$(ARCH)/as/mach4.c @echo PREPROCESS $$@ @mkdir -p $$(dir $$@) @@ -39,6 +41,10 @@ $(OBJDIR)/$D/preprocessed-comm2.y: mach/proto/as/comm2.y $(CPPANSI) \ $(call rawfile, $(LIBOBJECT)) $(call cprogram, $(BINDIR)/$(PLATFORM)/as) $(call installto, $(PLATDEP)/$(PLATFORM)/as) + + $(call reset) + $(call file, man/$(ARCH)_as.6) + $(call installto, $(INSDIR)/share/man/man6/$(ARCH)_as.6) endef build-as = $(eval $(build-as-impl)) diff --git a/mach/proto/ncg/fillem.c b/mach/proto/ncg/fillem.c index 8838d8f9c..516239b3d 100644 --- a/mach/proto/ncg/fillem.c +++ b/mach/proto/ncg/fillem.c @@ -82,6 +82,8 @@ extern char em_flag[]; extern short em_ptyp[]; extern double atof(); +void prolog(full nlocals); + /* Own version of atol that continues computing on overflow. We don't know that about the ANSI C one. */ diff --git a/mach/vc4/.distr b/mach/vc4/.distr new file mode 100644 index 000000000..38864464f --- /dev/null +++ b/mach/vc4/.distr @@ -0,0 +1,5 @@ +build.mk +as +ncg +libem +libend diff --git a/mach/vc4/as/.distr b/mach/vc4/as/.distr new file mode 100644 index 000000000..6ef7a1516 --- /dev/null +++ b/mach/vc4/as/.distr @@ -0,0 +1,7 @@ +mach0.c +mach1.c +mach2.c +mach3.c +mach4.c +mach5.c +binary.h diff --git a/mach/vc4/as/binary.h b/mach/vc4/as/binary.h new file mode 100644 index 000000000..c00e73550 --- /dev/null +++ b/mach/vc4/as/binary.h @@ -0,0 +1,34 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef BINARY_H +#define BINARY_H + +/* This grotesque nonsense allows us to use binary constants from C. */ + +#define HEX__(n) 0x##n##LU +#define B8__(x) \ + ((x&0x0000000FLU)?1:0) \ + +((x&0x000000F0LU)?2:0) \ + +((x&0x00000F00LU)?4:0) \ + +((x&0x0000F000LU)?8:0) \ + +((x&0x000F0000LU)?16:0) \ + +((x&0x00F00000LU)?32:0) \ + +((x&0x0F000000LU)?64:0) \ + +((x&0xF0000000LU)?128:0) + +#define B8(d) \ + ((unsigned char)B8__(HEX__(d))) +#define B16(dmsb,dlsb) \ + (((unsigned short)B8(dmsb)<<8) + B8(dlsb)) +#define B32(dmsb,db2,db3,dlsb) \ + (((unsigned long)B8(dmsb)<<24) \ + + ((unsigned long)B8(db2)<<16) \ + + ((unsigned long)B8(db3)<<8) \ + + B8(dlsb)) + +#endif diff --git a/mach/vc4/as/mach0.c b/mach/vc4/as/mach0.c new file mode 100644 index 000000000..2df70061c --- /dev/null +++ b/mach/vc4/as/mach0.c @@ -0,0 +1,32 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#define THREE_PASS /* branch and offset optimization */ +#define LISTING /* enable listing facilities */ +#define RELOCATION /* generate relocatable code */ +#define DEBUG 0 + +#undef valu_t +#define valu_t long + +#undef ADDR_T +#define ADDR_T long + +#undef word_t +#define word_t long + +typedef unsigned long quad; + +#undef ALIGNWORD +#define ALIGNWORD 4 + +#undef ALIGNSECT +#define ALIGNSECT 4 + +#undef VALWIDTH +#define VALWIDTH 8 + diff --git a/mach/vc4/as/mach1.c b/mach/vc4/as/mach1.c new file mode 100644 index 000000000..44caf0c57 --- /dev/null +++ b/mach/vc4/as/mach1.c @@ -0,0 +1,28 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "binary.h" + +#define ALWAYS 14 + +extern void alu_instr_reg(quad opcode, int cc, int rd, int ra, int rb); +extern void alu_instr_lit(quad opcode, int cc, int rd, int ra, long value); +extern void misc_instr_reg(quad opcode, int cc, int rd, int ra, int rb); +extern void misc_instr_lit(quad opcode, int cc, int rd, int ra, quad value); +extern void branch_instr(int bl, int cc, struct expr_t* expr); +extern void stack_instr(quad opcode, int loreg, int hireg, int extrareg); +extern void mem_instr(quad opcode, int cc, int rd, long offset, int rs); +extern void mem_offset_instr(quad opcode, int cc, int rd, int qa, int rb); +extern void mem_postincr_instr(quad opcode, int cc, int rd, int rs); +extern void mem_address_instr(quad opcode, int rd, struct expr_t* expr); +extern void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr); +extern void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr); +extern void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr); +extern void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr); +extern void lea_stack_instr(int rd, long va, int rs); +extern void lea_address_instr(int rd, struct expr_t* expr); +extern void fltcnv_instr(quad opcode, int cc, int rd, int ra, quad shift); diff --git a/mach/vc4/as/mach2.c b/mach/vc4/as/mach2.c new file mode 100644 index 000000000..ec7a948d8 --- /dev/null +++ b/mach/vc4/as/mach2.c @@ -0,0 +1,24 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +%token GPR +%token CC + +%token OP +%token OP_BRANCH OP_BRANCHLINK OP_ADDCMPB +%token OP_ONEREG +%token OP_ONELREG +%token OP_ALU +%token OP_FPU +%token OP_MEM +%token OP_MISC +%token OP_MISCL +%token OP_FLTCNV +%token OP_STACK +%token OP_LEA + + diff --git a/mach/vc4/as/mach3.c b/mach/vc4/as/mach3.c new file mode 100644 index 000000000..4ca73f4ed --- /dev/null +++ b/mach/vc4/as/mach3.c @@ -0,0 +1,153 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +/* Integer registers */ + +0, GPR, 0, "r0", +0, GPR, 1, "r1", +0, GPR, 2, "r2", +0, GPR, 3, "r3", +0, GPR, 4, "r4", +0, GPR, 5, "r5", + +0, GPR, 6, "r6", +0, GPR, 7, "r7", +0, GPR, 8, "r8", +0, GPR, 9, "r9", +0, GPR, 10, "r10", +0, GPR, 11, "r11", +0, GPR, 12, "r12", +0, GPR, 13, "r13", +0, GPR, 14, "r14", +0, GPR, 15, "r15", +0, GPR, 16, "r16", +0, GPR, 17, "r17", +0, GPR, 18, "r18", +0, GPR, 19, "r19", +0, GPR, 20, "r20", +0, GPR, 21, "r21", +0, GPR, 22, "r22", +0, GPR, 23, "r23", +0, GPR, 24, "r24", +0, GPR, 24, "fp", + +0, GPR, 25, "r25", +0, GPR, 25, "sp", +0, GPR, 26, "r26", +0, GPR, 26, "lr", +0, GPR, 27, "r27", +0, GPR, 28, "r28", +0, GPR, 29, "r29", +0, GPR, 30, "r30", +0, GPR, 30, "sr", +0, GPR, 31, "r31", +0, GPR, 31, "pc", + +/* Condition codes */ + +0, CC, 0, ".eq", +0, CC, 1, ".ne", +0, CC, 2, ".cs", +0, CC, 2, ".lo", +0, CC, 3, ".cc", +0, CC, 3, ".hs", +0, CC, 4, ".mi", +0, CC, 5, ".pl", +0, CC, 6, ".vs", +0, CC, 7, ".vc", +0, CC, 8, ".hi", +0, CC, 9, ".ls", +0, CC, 10, ".ge", +0, CC, 11, ".lt", +0, CC, 12, ".gt", +0, CC, 13, ".le", +0, CC, 15, ".f", + +/* Special instructions */ + +0, OP, B16(00000000,00000001), "nop", +0, OP, B16(00000000,00001010), "rti", + +0, OP_BRANCH, 0, "b", +0, OP_BRANCHLINK, 0, "bl", +0, OP_ADDCMPB, 0, "addcmpb", + +0, OP_ONELREG, B16(00000000,10000000), "tbb", +0, OP_ONELREG, B16(00000000,10100000), "tbs", + +0, OP_ALU, B8(00000000), "mov", +0, OP_ALU, B8(00000001), "cmn", +0, OP_ALU, B8(00000010), "add", +0, OP_ALU, B8(00000011), "bic", +0, OP_ALU, B8(00000100), "mul", +0, OP_ALU, B8(00000101), "eor", +0, OP_ALU, B8(00000110), "sub", +0, OP_ALU, B8(00000111), "and", +0, OP_ALU, B8(00001000), "mvn", +0, OP_ALU, B8(00001001), "ror", +0, OP_ALU, B8(00001010), "cmp", +0, OP_ALU, B8(00001011), "rsb", +0, OP_ALU, B8(00001100), "btst", +0, OP_ALU, B8(00001101), "or", +0, OP_ALU, B8(00001110), "extu", +0, OP_ALU, B8(00001111), "max", +0, OP_ALU, B8(00010000), "bset", +0, OP_ALU, B8(00010001), "min", +0, OP_ALU, B8(00010010), "bclr", +0, OP_ALU, B8(00010011), "adds2", +0, OP_ALU, B8(00010100), "bchg", +0, OP_ALU, B8(00010101), "adds4", +0, OP_ALU, B8(00010110), "adds8", +0, OP_ALU, B8(00010111), "adds16", +0, OP_ALU, B8(00011000), "exts", +0, OP_ALU, B8(00011001), "neg", +0, OP_ALU, B8(00011010), "lsr", +0, OP_ALU, B8(00011011), "log2", +0, OP_ALU, B8(00011100), "lsl", +0, OP_ALU, B8(00011101), "brev", +0, OP_ALU, B8(00011110), "asr", +0, OP_ALU, B8(00011111), "abs", + +0, OP_MISC, B16(11001000,00000000), "fadd", +0, OP_MISC, B16(11001000,00100000), "fsub", +0, OP_MISC, B16(11001000,01000000), "fmul", +0, OP_MISC, B16(11001000,01100000), "fdiv", +0, OP_MISC, B16(11001000,10000000), "fcmp", +0, OP_MISC, B16(11001000,10100000), "fabs", +0, OP_MISC, B16(11001000,11000000), "frsb", +0, OP_MISC, B16(11001000,11100000), "fmax", +0, OP_MISC, B16(11001001,00000000), "frcp", +0, OP_MISC, B16(11001001,00100000), "frsqrt", +0, OP_MISC, B16(11001001,01000000), "fnmul", +0, OP_MISC, B16(11001001,01100000), "fmin", +0, OP_MISC, B16(11001001,10000000), "fld1", +0, OP_MISC, B16(11001001,10100000), "fld0", +0, OP_MISC, B16(11001001,11000000), "log2", +0, OP_MISC, B16(11001001,11100000), "exp2", +0, OP_MISC, B16(11000101,11100000), "adds256", + +0, OP_FLTCNV, B16(11001010,00000000), "ftrunc", +0, OP_FLTCNV, B16(11001010,00100000), "floor", +0, OP_FLTCNV, B16(11001010,01000000), "flts", +0, OP_FLTCNV, B16(11001010,01100000), "fltu", + +0, OP_MISCL, B16(11000100,10000000), "divs", +0, OP_MISCL, B16(11000100,11100000), "divu", + +0, OP_STACK, B16(00000010,10000000), "push", +0, OP_STACK, B16(00000010,00000000), "pop", + +0, OP_MEM, B8(00000000), "ld", +0, OP_MEM, B8(00000001), "st", +0, OP_MEM, B8(00000010), "ldh", +0, OP_MEM, B8(00000011), "sth", +0, OP_MEM, B8(00000100), "ldb", +0, OP_MEM, B8(00000101), "stb", +0, OP_MEM, B8(00000110), "ldhs", +0, OP_MEM, B8(00000111), "sths", + +0, OP_LEA, 0, "lea", diff --git a/mach/vc4/as/mach4.c b/mach/vc4/as/mach4.c new file mode 100644 index 000000000..99c368c12 --- /dev/null +++ b/mach/vc4/as/mach4.c @@ -0,0 +1,87 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +operation + : OP { emit2($1); } + + | OP_BRANCH GPR { emit2(B16(00000000,01000000) | ($2<<0)); } + | OP_BRANCHLINK GPR { emit2(B16(00000000,01100000) | ($2<<0)); } + + | OP_BRANCH expr { branch_instr(0, ALWAYS, &$2); } + | OP_BRANCHLINK expr { branch_instr(1, ALWAYS, &$2); } + | OP_BRANCH CC expr { branch_instr(0, $2, &$3); } + | OP_BRANCHLINK CC expr { branch_instr(1, $2, &$3); } + + | OP_BRANCH GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, $2, 0, $4, &$6); } + | OP_BRANCH CC GPR ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, $3, 0, $5, &$7); } + | OP_BRANCH GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, $2, 0, $5, &$7); } + | OP_BRANCH CC GPR ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, $3, 0, $6, &$8); } + | OP_ADDCMPB GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr(ALWAYS, $2, $4, $6, &$8); } + | OP_ADDCMPB CC GPR ',' GPR ',' GPR ',' expr { branch_addcmp_reg_reg_instr($2, $3, $5, $7, &$9); } + | OP_ADDCMPB GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_lit_reg_instr(ALWAYS, $2, $5, $7, &$9); } + | OP_ADDCMPB CC GPR ',' '#' absexp ',' GPR ',' expr { branch_addcmp_lit_reg_instr($2, $3, $6, $8, &$10); } + | OP_ADDCMPB GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr(ALWAYS, $2, $4, $7, &$9); } + | OP_ADDCMPB CC GPR ',' GPR ',' '#' absexp ',' expr { branch_addcmp_reg_lit_instr($2, $3, $5, $8, &$10); } + | OP_ADDCMPB GPR ',' '#' absexp ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr(ALWAYS, $2, $5, $8, &$10); } + | OP_ADDCMPB CC GPR ',' '#' absexp ',' '#' absexp ',' expr { branch_addcmp_lit_lit_instr($2, $3, $6, $9, &$11); } + + | OP_ONELREG GPR + { + if ($2 >= 0x10) + serror("cannot use r16+ here"); + emit2($1 | ($2<<0)); + } + + | OP_ALU GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $2, $4); } + | OP_ALU GPR ',' GPR ',' GPR { alu_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_ALU CC GPR ',' GPR { alu_instr_reg($1, $2, $3, $3, $5); } + | OP_ALU CC GPR ',' GPR ',' GPR { alu_instr_reg($1, $2, $3, $5, $7); } + + | OP_ALU GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $2, $5); } + | OP_ALU GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_ALU CC GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $3, $6); } + | OP_ALU CC GPR ',' GPR ',' '#' absexp { alu_instr_lit($1, $2, $3, $5, $8); } + + | OP_MISC GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISC CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + + | OP_MISCL GPR ',' GPR ',' GPR { misc_instr_reg($1, ALWAYS, $2, $4, $6); } + | OP_MISCL CC GPR ',' GPR ',' GPR { misc_instr_reg($1, $2, $3, $5, $7); } + | OP_MISCL GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, ALWAYS, $2, $4, $7); } + | OP_MISCL CC GPR ',' GPR ',' '#' absexp { misc_instr_lit($1, $2, $3, $5, $8); } + + | OP_STACK GPR { stack_instr($1, $2, $2, -1); } + | OP_STACK GPR ',' GPR { stack_instr($1, $2, $2, $4); } + | OP_STACK GPR '-' GPR { stack_instr($1, $2, $4, -1); } + | OP_STACK GPR '-' GPR ',' GPR { stack_instr($1, $2, $4, $6); } + + | OP_MEM GPR ',' '(' GPR ')' { mem_instr($1, ALWAYS, $2, 0, $5); } + | OP_MEM CC GPR ',' '(' GPR ')' { mem_instr($1, $2, $3, 0, $6); } + | OP_MEM GPR ',' absexp '(' GPR ')' { mem_instr($1, ALWAYS, $2, $4, $6); } + | OP_MEM CC GPR ',' absexp '(' GPR ')' { mem_instr($1, $2, $3, $5, $7); } + + | OP_MEM GPR ',' '(' GPR ',' GPR ')' { mem_offset_instr($1, ALWAYS, $2, $5, $7); } + | OP_MEM CC GPR ',' '(' GPR ',' GPR ')' { mem_offset_instr($1, $2, $3, $6, $8); } + + | OP_MEM GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, ALWAYS, $2, $5); } + | OP_MEM CC GPR ',' '(' GPR ')' '+' '+' { mem_postincr_instr($1, $2, $3, $6); } + + | OP_MEM GPR ',' expr { mem_address_instr($1, $2, &$4); } + + | OP_LEA GPR ',' absexp '(' GPR ')' { lea_stack_instr($2, $4, $6); } + | OP_LEA GPR ',' expr { lea_address_instr($2, &$4); } + + | OP_FLTCNV GPR ',' GPR { fltcnv_instr($1, ALWAYS, $2, $4, 0); } + | OP_FLTCNV CC GPR ',' GPR { fltcnv_instr($1, $2, $3, $5, 0); } + | OP_FLTCNV GPR ',' GPR ',' shift '#' absexp { fltcnv_instr($1, ALWAYS, $2, $4, $8); } + | OP_FLTCNV CC GPR ',' GPR ',' shift '#' absexp { fltcnv_instr($1, $2, $3, $5, $9); } + ; + +shift + : 'l' 's' 'r' + | 'l' 's' 'l'; + diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c new file mode 100644 index 000000000..5ff56f779 --- /dev/null +++ b/mach/vc4/as/mach5.c @@ -0,0 +1,501 @@ +/* + * VideoCore IV assembler for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include + +#define maskx(v, x) (v & ((1<<(x))-1)) + +static void toobig(void) +{ + serror("offset too big to encode into instruction"); +} + +/* Assemble an ALU instruction where rb is a register. */ + +void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) +{ + /* Can we use short form? */ + + if ((cc == ALWAYS) && (ra == rd) && (ra < 0x10) && (rb < 0x10)) + { + emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0)); + return; + } + + /* Long form, then. */ + + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Assemble an ALU instruction where rb is a literal. */ + +void alu_instr_lit(quad op, int cc, int rd, int ra, long value) +{ + /* 16 bit short form? */ + + if ((cc == ALWAYS) && !(op & 1) && (value >= 0) && (value <= 0x1f) && + (ra == rd) && (ra < 0x10)) + { + emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0)); + return; + } + + /* 32 bit medium form? */ + + if ((value >= 0) && (value <= 0x1f)) + { + emit2(B16(11000000,00000000) | (op<<5) | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); + return; + } + + /* Long form, then. */ + + if (cc != ALWAYS) + serror("cannot use condition codes with ALU literals this big"); + + /* add is special. */ + + if (op == B8(00000010)) + emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0)); + else + { + if (ra != rd) + serror("can only use 2op form of ALU instructions with literals this big"); + emit2(B16(11101000,00000000) | (op<<5) | (rd<<0)); + } + + emit4(value); +} + +/* Miscellaneous instructions with three registers and a cc. */ + +void misc_instr_reg(quad op, int cc, int rd, int ra, int rb) +{ + emit2(op | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Miscellaneous instructions with two registers, a literal, and a cc. */ + +void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) +{ + if (value < 0x1f) + serror("only constants from 0..31 can be used here"); + + emit2(op | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0)); +} + +/* Assemble a branch instruction. This may be a near branch into this + * object file, or a far branch which requires a fixup. */ + +void branch_instr(int bl, int cc, struct expr_t* expr) +{ + quad pc = DOTVAL; + quad type = expr->typ & S_TYP; + int d; + + /* Sanity checking. */ + + if (bl && (cc != ALWAYS)) + serror("can't use condition codes with bl"); + if (type == S_ABS) + serror("can't use absolute addresses here"); + + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + d = (int32_t)expr->val - (int32_t)pc; + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + d /= 2; + + /* If this is a reference to code within this section, and it's + * close enough to the program counter, we can use a short- + * form instruction. */ + + if (small(!bl && (type == DOTTYP) && fitx(d, 7), 2)) + { + emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); + return; + } + + /* Absolute addresses and references to other sections + * need the full 32 bits. */ + + newrelo(expr->typ, RELOVC4|RELPC); + + if (bl) + { + quad v, hiv, lov; + + if (!fitx(d, 27)) + toobig(); + + v = maskx(d, 27); + hiv = v >> 23; + lov = v & 0x007fffff; + emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); + emit2(B16(00000000,00000000) | (lov&0xffff)); + } + else + { + quad v; + + if (!fitx(d, 23)) + toobig(); + + v = maskx(d, 23); + emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); + emit2(B16(00000000,00000000) | (v&0xffff)); + } +} + +/* Push/pop. */ + +void stack_instr(quad opcode, int loreg, int hireg, int extrareg) +{ + int b; + int m; + + switch (loreg) + { + case 0: b = 0; break; + case 6: b = 1; break; + case 16: b = 2; break; + case 24: b = 3; break; + + case 26: /* lr */ + extrareg = 26; + hireg = loreg = -1; + break; + + case 31: /* pc */ + extrareg = 31; + hireg = loreg = -1; + break; + + default: + serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc"); + } + + if (opcode & 0x0080) + { + /* Push */ + if (extrareg == 31) + serror("cannot push pc"); + } + else + { + /* Pop */ + if (extrareg == 26) + serror("cannot pop lr"); + } + + if (hireg < loreg) + serror("invalid register range"); + + if (hireg == -1) + { + b = 3; + m = 15; + } + else + m = hireg - loreg; + + emit2(opcode | (b<<5) | (m<<0) | ((extrareg != -1) ? 0x0100 : 0)); +} + +/* Memory operations where the offset is a fixed value (including zero). */ + +void mem_instr(quad opcode, int cc, int rd, long offset, int rs) +{ + quad uoffset = (quad) offset; + int multiple4 = !(offset & 3); + int intonly = ((opcode & B8(00000110)) == 0); + + /* If no CC, there are some special forms we can use. */ + + if (cc == ALWAYS) + { + /* Very short form, special for stack offsets. */ + + if (intonly && (rs == 25) && multiple4 && fitx(offset, 7) && (rd < 0x10)) + { + quad o = maskx(offset, 7) / 4; + emit2(B16(00000100,00000000) | (opcode<<9) | (o<<4) | (rd<<0)); + return; + } + + /* Slightly longer form for directly dereferencing via a register. */ + + if ((rs < 0x10) && (rd < 0x10) && (offset == 0)) + { + emit2(B16(00001000,00000000) | (opcode<<8) | (rs<<4) | (rd<<0)); + return; + } + + /* Integer only, but a limited offset. */ + + if (intonly && (uoffset <= 0x3f) && (rs < 0x10) && (rd < 0x10)) + { + quad o = uoffset / 4; + emit2(B16(00100000,00000000) | (opcode<<12) | (o<<8) | + (rs<<4) | (rd<<0)); + return; + } + + /* Certain registers support 16-bit offsets. */ + + if (fitx(offset, 16)) + { + switch (rs) + { + case 0: opcode = B16(10101011,00000000) | (opcode<<5); goto specialreg; + case 24: opcode = B16(10101000,00000000) | (opcode<<5); goto specialreg; + case 25: opcode = B16(10101001,00000000) | (opcode<<5); goto specialreg; + case 31: opcode = B16(10101010,00000000) | (opcode<<5); goto specialreg; + default: break; + + specialreg: + { + quad o = maskx(offset, 16); + emit2(opcode | (rd<<0)); + emit2(o); + return; + } + } + } + + /* 12-bit displacements. */ + + if (fitx(offset, 12)) + { + quad looffset = maskx(offset, 11); + quad hioffset = (offset >> 11) & 1; + + emit2(B16(10100010,00000000) | (opcode<<5) | (rd<<0) | (hioffset<<8)); + emit2(B16(00000000,00000000) | (rs<<11) | (looffset<<0)); + return; + } + + /* Everything else uses Very Long Form. */ + + if (!fitx(offset, 27)) + serror("offset will not fit into load/store instruction"); + + if (rs == 31) + opcode = B16(11100111,00000000) | (opcode<<5); + else + opcode = B16(11100110,00000000) | (opcode<<5); + + emit2(opcode | (rd<<0)); + emit4((rs<<27) | maskx(offset, 27)); + return; + } + + /* Now we're on to load/store instructions with ccs. */ + + if (uoffset <= 0x1f) + { + emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,01000000) | (rs<<11) | (cc<<7) | (uoffset<<0)); + return; + } + + /* No encoding for this instruction. */ + + serror("invalid load/store instruction"); +} + +/* Memory operations where the destination address is a sum of two + * registers. */ + +void mem_offset_instr(quad opcode, int cc, int rd, int ra, int rb) +{ + emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0)); +} + +/* Memory operations with postincrement. */ + +void mem_postincr_instr(quad opcode, int cc, int rd, int rs) +{ + emit2(B16(10100101,00000000) | (opcode<<5) | (rd<<0)); + emit2(B16(00000000,00000000) | (rs<<11) | (cc<<7)); +} + +/* Memory operations where the destination is an address literal. */ + +void mem_address_instr(quad opcode, int rd, struct expr_t* expr) +{ + static const char sizes[] = {4, 4, 2, 2, 1, 1, 2, 2}; + int size = sizes[opcode]; + quad type = expr->typ & S_TYP; + int d, scaledd; + + /* Sanity checking. */ + + if (type == S_ABS) + serror("can't use absolute addresses here"); + + d = expr->val - DOTVAL; + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + scaledd = d/size; + + /* If this is a reference to an address within this section, and + * it's close enough to the program counter, we can use a + * shorter instruction. */ + + if (small((type==DOTTYP) && fitx(scaledd, 16), 2)) + { + emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); + emit2(scaledd); + return; + } + + /* Otherwise we need the full 48 bits. */ + + newrelo(expr->typ, RELOVC4|RELPC); + + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + + if (!fitx(d, 27)) + toobig(); + + emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); + emit4((31<<27) | maskx(d, 27)); +} + +/* Common code for handling addcmp: merge in as much of expr as will fit to + * the second pair of the addcmp opcode. */ + +static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr) +{ + quad type = expr->typ & S_TYP; + int d; + + if ((pass>0) && (type != DOTTYP)) + serror("can't use this type of branch to jump outside the section"); + + /* The VC4 branch instructions express distance in 2-byte + * words. */ + + d = (expr->val - DOTVAL-2 + 4); + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + d /= 2; + + if ((pass == 2) && !fitx(d, bits)) + serror("target of branch is too far away"); + + emit2(opcode | maskx(d, bits)); +} + +void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (ra >= 0x10) || (rs >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0)); + branch_addcmp_common(B16(00000000,00000000) | (rs<<10), 10, expr); +} + +void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (rs >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(va, 4)) + serror("value too big to encode into instruction"); + va = maskx(va, 4); + + emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0)); + branch_addcmp_common(B16(01000000,00000000) | (rs<<10), 10, expr); +} + +void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr) +{ + if ((rd >= 0x10) || (ra >= 0x10)) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(vs, 6)) + serror("value too big to encode into instruction"); + vs = maskx(vs, 6); + + emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0)); + branch_addcmp_common(B16(10000000,00000000) | (vs<<8), 8, expr); +} + +void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr) +{ + if (rd >= 0x10) + serror("can only use r0-r15 in this instruction"); + + if (!fitx(va, 4) || !fitx(vs, 6)) + serror("value too big to encode into instruction"); + va = maskx(va, 4); + vs = maskx(vs, 6); + + emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0)); + branch_addcmp_common(B16(11000000,00000000) | (vs<<8), 8, expr); +} + +/* lea, where the source is relative to the stack. */ + +void lea_stack_instr(int rd, long va, int rs) +{ + if (rs != 25) + serror("source register must be sp"); + + va /= 4; + if (!fitx(va, 6)) + serror("offset too big to encode in instruction"); + va = maskx(va, 6); + + emit2(B16(00010000,00000000) | (rd<<0) | (va<<5)); +} + +/* lea, where the source is an address. */ + +void lea_address_instr(int rd, struct expr_t* expr) +{ + quad pc = DOTVAL; + quad type = expr->typ & S_TYP; + int d = expr->val - pc; + + if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT)) + d -= DOTGAIN; + + if (type == S_ABS) + serror("can't use absolute addresses here"); + + newrelo(expr->typ, RELOVC4|RELPC); + + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + + emit2(B16(11100101,00000000) | (rd<<0)); + emit4(expr->val - pc); +} + +/* Floating point conversion opcodes (ftrunc, floor, flts, fltu). */ + +void fltcnv_instr(quad opcode, int cc, int rd, int ra, quad shift) +{ + fitx(shift, 6); + + emit2(opcode | (rd<<0)); + emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | shift); +} + diff --git a/mach/vc4/build.mk b/mach/vc4/build.mk new file mode 100644 index 000000000..b7ca860d8 --- /dev/null +++ b/mach/vc4/build.mk @@ -0,0 +1,11 @@ +arch-libem-vc4 := \ + csa.s \ + csb.s + +arch-libend-vc4 = \ + edata.s \ + em_end.s \ + end.s \ + etext.s + + diff --git a/mach/vc4/libem/.distr b/mach/vc4/libem/.distr new file mode 100644 index 000000000..5dff9f5ac --- /dev/null +++ b/mach/vc4/libem/.distr @@ -0,0 +1,3 @@ +csa.s +csb.s +videocore.h diff --git a/mach/vc4/libem/csa.s b/mach/vc4/libem/csa.s new file mode 100644 index 000000000..25332f2a7 --- /dev/null +++ b/mach/vc4/libem/csa.s @@ -0,0 +1,36 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "videocore.h" + +.define .csa +.sect .data +.csa: + ! on entry: + ! r0 = un-fixed-up descriptor + ! r1 = value + add r0, gp + + ld r2, 4 (r0) ! check lower bound + + b.lo r1, r2, default ! jump to default if r1 < r2 + + sub r1, r2 ! adjust value to be 0-based + + ld r2, 8 (r0) ! check upper bound + b.hi r1, r2, default ! jump to default if r1 > r2 + + add r1, #3 +go: + ld r1, (r0, r1) ! load destination address + add r1, gp + b r1 ! ...and go + +default: + mov r1, #0 ! index of default value + b go diff --git a/mach/vc4/libem/csb.s b/mach/vc4/libem/csb.s new file mode 100644 index 000000000..7ed74c39f --- /dev/null +++ b/mach/vc4/libem/csb.s @@ -0,0 +1,33 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "videocore.h" + +.define .csb +.sect .data +.csb: + ! on entry: + ! r0 = un-fixed-up descriptor + ! r1 = value + add r0, gp ! r0 = fixed up descriptor + + adds8 r2, r0, #1 ! r2 = moving pointer + ld r3, 4 (r0) ! r3 = count + adds8 r3, r0, r3 ! r3 = end ptr + +loop: + ld r4, (r2)++ + b.eq r4, r1, matched ! r2 points at matching addr + addcmpb.le r2, #4, r3, loop +notmatched: + mov r2, r0 ! r2 points at default jump +matched: + ld r2, (r2) ! load destination address + add r2, gp ! fix up r2 + b r2 ! ...and go + diff --git a/mach/vc4/libem/videocore.h b/mach/vc4/libem/videocore.h new file mode 100644 index 000000000..8ccb981ee --- /dev/null +++ b/mach/vc4/libem/videocore.h @@ -0,0 +1,17 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +#define gp r15 + diff --git a/mach/vc4/libend/.distr b/mach/vc4/libend/.distr new file mode 100644 index 000000000..afa027b6e --- /dev/null +++ b/mach/vc4/libend/.distr @@ -0,0 +1,4 @@ +edata.s +em_end.s +end.s +etext.s diff --git a/mach/vc4/libend/edata.s b/mach/vc4/libend/edata.s new file mode 100644 index 000000000..e706877db --- /dev/null +++ b/mach/vc4/libend/edata.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _edata +.sect .data +_edata: diff --git a/mach/vc4/libend/em_end.s b/mach/vc4/libend/em_end.s new file mode 100644 index 000000000..bae5aaa0c --- /dev/null +++ b/mach/vc4/libend/em_end.s @@ -0,0 +1,24 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .end ! only for declaration of _end, __end and endbss. +.define endtext, endrom, enddata, endbss, __end + + .sect .text +endtext: + .sect .rom +endrom: + .sect .data +enddata: + .sect .end +__end: +endbss: diff --git a/mach/vc4/libend/end.s b/mach/vc4/libend/end.s new file mode 100644 index 000000000..5ce2882b6 --- /dev/null +++ b/mach/vc4/libend/end.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _end +.sect .end ! only for declaration of _end, __end and endbss. +_end: diff --git a/mach/vc4/libend/etext.s b/mach/vc4/libend/etext.s new file mode 100644 index 000000000..973ab1814 --- /dev/null +++ b/mach/vc4/libend/etext.s @@ -0,0 +1,15 @@ +# +/* + * VideoCore IV support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.define _etext +.sect .text +_etext: diff --git a/mach/vc4/ncg/.distr b/mach/vc4/ncg/.distr new file mode 100644 index 000000000..ccdf9bf7e --- /dev/null +++ b/mach/vc4/ncg/.distr @@ -0,0 +1,3 @@ +mach.c +mach.h +table diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c new file mode 100644 index 000000000..124e8a965 --- /dev/null +++ b/mach/vc4/ncg/mach.c @@ -0,0 +1,86 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include + +int framesize; + +/* Write out a constant data section. */ + +con_part(int sz, word w) +{ + while (part_size % sz) + part_size++; + if (part_size == TEM_WSIZE) + part_flush(); + if (sz == 1 || sz == 2) { + w &= (sz == 1 ? 0xFF : 0xFFFF); + w <<= 8 * part_size; + part_word |= w; + } else { + assert(sz == 4); + part_word = w; + } + part_size += sz; +} + +con_mult(word sz) +{ + if (argval != 4) + fatal("bad icon/ucon size"); + fprintf(codefile,".data4 %s\n", str); +} + +#define CODE_GENERATOR +#define IEEEFLOAT +#define FL_MSL_AT_LOW_ADDRESS 0 +#define FL_MSW_AT_LOW_ADDRESS 0 +#define FL_MSB_AT_LOW_ADDRESS 0 +#include + +void prolog(full nlocals) +{ + int ss = nlocals + 8; + fprintf(codefile, "push fp, lr\n"); + fprintf(codefile, "mov fp, sp\n"); + if (nlocals > 0) + fprintf(codefile, "sub sp, #%d\n", nlocals); + + framesize = nlocals; +} + +mes(word type) +{ + int argt ; + + switch ( (int)type ) { + case ms_ext : + for (;;) { + switch ( argt=getarg( + ptyp(sp_cend)|ptyp(sp_pnam)|sym_ptyp) ) { + case sp_cend : + return ; + default: + strarg(argt) ; + fprintf(codefile,".define %s\n",argstr) ; + break ; + } + } + default : + while ( getarg(any_ptyp) != sp_cend ) ; + break ; + } +} + +char *segname[] = { + ".sect .text", + ".sect .data", + ".sect .rom", + ".sect .bss" +}; + diff --git a/mach/vc4/ncg/mach.h b/mach/vc4/ncg/mach.h new file mode 100644 index 000000000..89d2b8a97 --- /dev/null +++ b/mach/vc4/ncg/mach.h @@ -0,0 +1,32 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#define ex_ap(y) fprintf(codefile,".extern %s\n",y) +#define in_ap(y) /* nothing */ + +#define newilb(x) fprintf(codefile,"%s:\n",x) +#define newdlb(x) fprintf(codefile,"%s:\n",x) +#define dlbdlb(x,y) fprintf(codefile,"%s = %s\n",x,y) +#define newlbss(l,x) fprintf(codefile,".comm %s,%u\n",l,x); + +#define cst_fmt "%d" +#define off_fmt "%d" +#define ilb_fmt "I%x_%x" +#define dlb_fmt "_%d" +#define hol_fmt "hol%d" + +#define hol_off "%ld+hol%d" + +#define con_cst(x) fprintf(codefile,".data4\t%ld\n",x) +#define con_ilb(x) fprintf(codefile,".data4\t%s\n",x) +#define con_dlb(x) fprintf(codefile,".data4\t%s\n",x) + +#define fmt_id(sf, st) sprintf(st,"_%s",sf) + +#define modhead ".sect .text; .sect .rom; .sect .data; .sect .bss\n" + +#define BSS_INIT 0 diff --git a/mach/vc4/ncg/table b/mach/vc4/ncg/table new file mode 100644 index 000000000..8b4fe60a5 --- /dev/null +++ b/mach/vc4/ncg/table @@ -0,0 +1,1560 @@ +/* + * VideoCore IV code generator for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +EM_WSIZE = 4 +EM_PSIZE = 4 +EM_BSIZE = 8 /* two words saved in call frame */ + +BYTE = 1 /* Size of values */ +WORD = 2 +QUAD = 4 + +FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ +PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ + +#define COMMENT(n) /* noop */ + + +#define nicesize(x) ((x)==BYTE || (x)==WORD || (x)==QUAD) + + + +PROPERTIES + + GPR /* any GPR */ + REG /* any allocatable GPR */ + STACKABLE /* a register than can be used with push/pop */ + + GPR0 GPR1 GPR2 GPR3 GPR4 GPR5 GPR6 GPR7 + GPR8 GPR9 GPR10 GPR11 GPR12 GPR13 GPR14 GPR15 + GPR16 GPR17 GPR18 GPR19 GPR20 GPR21 GPR22 GPR23 + + GPRGP GPRFP GPRSP GPRLR GPRPC + +REGISTERS + + R0("r0") : GPR, REG, GPR0, STACKABLE. + R1("r1") : GPR, REG, GPR1. + R2("r2") : GPR, REG, GPR2. + R3("r3") : GPR, REG, GPR3. + R4("r4") : GPR, REG, GPR4. + R5("r5") : GPR, REG, GPR5. + R6("r6") : GPR, GPR6. + R7("r7") : GPR, REG, GPR7. + R8("r8") : GPR, REG, GPR8. + R9("r9") : GPR, REG, GPR9. + R10("r10") : GPR, REG, GPR10. + R11("r11") : GPR, REG, GPR11. + R12("r12") : GPR, REG, GPR12. + R13("r13") : GPR, REG, GPR13. + R14("r14") : GPR, REG, GPR14. + GP("r15") : GPR, GPRGP. + + R16("r16") : GPR, GPR16. + + R23("r23") : GPR. + FP("fp") : GPR, GPRFP. + SP("sp") : GPR, GPRSP. + LR("lr") : GPR, GPRLR. + PC("pc") : GPR, GPRPC. + /* r26 to r31 are special and the code generator doesn't touch them. */ + + #define SCRATCH R6 + +TOKENS + +/* Used only in instruction descriptions (to generate the correct syntax). */ + + GPROFFSET = { GPR reg; INT off; } 4 off "(" reg ")". + GPRGPR = { GPR reg1; GPR reg2; } 4 "(" reg1 "," reg2 ")". + GPRINC = { GPR reg; } 4 "(" reg ")++". + ADDCMPB_LL = { GPR rd; INT val; INT vs; ADDR dest; } 4 rd ",#" val ",#" vs "," dest. + +/* Primitives */ + + LABEL = { ADDR adr; } 4 adr. + CONST = { INT val; } 4 "#" val. + +/* Sign extended values. */ + + /* The size refers to the *source*. */ + SIGNEX8 = { GPR reg; } 4 reg. + SIGNEX16 = { GPR reg; } 4 reg. + +/* The results of comparisons. */ + + TRISTATE_RC_S = { GPR reg; INT val; } 4. + TRISTATE_RC_U = { GPR reg; INT val; } 4. + TRISTATE_RR_S = { GPR reg1; GPR reg2; } 4. + TRISTATE_RR_U = { GPR reg1; GPR reg2; } 4. + + + +SETS + + TOKEN = LABEL + CONST. + OP = TOKEN + SIGNEX8 + SIGNEX16. + ANY = GPR + OP. + + + +INSTRUCTIONS + + add GPR:wo, GPR:ro, GPR+CONST:ro. + add GPR:rw, GPR+CONST:ro. + addcmpbge "addcmpb.ge" ADDCMPB_LL:rw. + adds2 GPR:rw, GPR+CONST:ro. + adds4 GPR:rw, GPR+CONST:ro. + adds8 GPR:rw, GPR+CONST:ro. + adds16 GPR:rw, GPR+CONST:ro. + adds256 GPR:rw, GPR:rw, GPR:ro. + and GPR:rw, GPR+CONST:ro. + asr GPR:rw, GPR+CONST:ro. + beq "b.eq" LABEL:ro. + bne "b.ne" LABEL:ro. + bgt "b.gt" LABEL:ro. + blt "b.lt" LABEL:ro. + bhi "b.hi" LABEL:ro. + bset GPR:rw, GPR+CONST:ro. + b GPR+LABEL:ro. + bl GPR+LABEL:ro. + cmp GPR:ro, GPR+CONST:ro kills :cc. + divs GPR:wo, GPR:ro, GPR+CONST:ro. + divu GPR:wo, GPR:ro, GPR+CONST:ro. + eor GPR:rw, GPR+CONST:ro. + exts GPR:wo, GPR:ro, GPR+CONST:ro. + exts GPR:rw, GPR+CONST:ro. + fadd GPR:wo, GPR:ro, GPR:ro. + fcmp GPR:wo, GPR:ro, GPR:ro. + fdiv GPR:wo, GPR:ro, GPR:ro. + flts GPR:wo, GPR:ro. + fltu GPR:wo, GPR:ro. + fmul GPR:wo, GPR:ro, GPR:ro. + fsub GPR:wo, GPR:ro, GPR:ro. + ftrunc GPR:wo, GPR:ro. + ld GPR:wo, GPRINC:rw. + ld GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldb GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldh GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + ldhs GPR:wo, GPROFFSET+GPRGPR+LABEL:ro. + lea GPR:wo, LABEL:ro. + lsl GPR:rw, GPR+CONST:ro. + lsl GPR:wo, GPR:ro, GPR+CONST:ro. + lsr GPR:rw, GPR+CONST:ro. + mov GPR:wo, GPR+CONST:ro. + mul GPR:rw, GPR+CONST:ro. + mvn GPR:wo, GPR+CONST:ro. + neg GPR:rw, GPR+CONST:ro. + or GPR:rw, GPR+CONST:ro. + pop GPR0+GPR6+GPR16+GPRFP+GPRPC:wo. + pop GPR0+GPR6+GPR16+GPRFP:wo, GPRPC:wo. + push GPR0+GPR6+GPR16+GPRFP+GPRLR:ro. + push GPR0+GPR6+GPR16+GPRFP:ro, GPRLR:ro. + rsb GPR:rw, GPR+CONST:ro. + sub GPR:wo, GPR:ro, CONST+GPR:ro. + sub GPR:rw, GPR+CONST:ro. + st GPR:ro, GPRINC:rw. + st GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + stb GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + sth GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + sths GPR:ro, GPROFFSET+GPRGPR+LABEL:ro. + + invalid "invalid". + comment "!" LABEL:ro. + + + +MOVES + + from GPR to GPR + gen + COMMENT("mov GPR->GPR") + mov %2, %1 + +/* Constants */ + + from CONST to GPR + gen + mov %2, %1 + + from LABEL to GPR + gen + lea %2, {LABEL, %1.adr} + sub %2, GP + +/* Sign extension */ + + from SIGNEX8 to GPR + gen + exts %2, %1.reg, {CONST, 8} + + from SIGNEX16 to GPR + gen + exts %2, %1.reg, {CONST, 16} + +/* Miscellaneous */ + + from CONST+LABEL+GPR to GPR + gen + move %1, %2 + + +TESTS + + to test GPR + gen + cmp %1, {CONST, 0} + + + +STACKINGRULES + + from GPR0+GPR6+GPR16 to STACK + gen + comment {LABEL, "push stackable"} + push %1 + + from OP+GPR to STACK + uses GPR0 + gen + move %1, %a + push %a + + from OP to STACK + uses STACKABLE + gen + move %1, %a + push %a + + from OP+GPR to STACK + gen + comment {LABEL, "push via scratch"} + move %1, SCRATCH + push SCRATCH + + + + +COERCIONS + + from OP + uses REG + gen + move %1, %a + yields %a + + from STACK + uses REG + gen + pop SCRATCH + move SCRATCH, %a + yields %a + + + +PATTERNS + +/* Intrinsics */ + + pat nop /* Does nothing */ + + pat loc /* Load constant */ + yields {CONST, $1} + + pat dup $1<=QUAD /* Duplicate word on top of stack */ + with ANY + yields %1 %1 + + pat dup $1==(2*QUAD) /* Duplicate word pair on top of stack */ + with ANY ANY + yields %1 %2 %1 %2 + + pat exg $1<=QUAD /* Exchange top two words on stack */ + with ANY ANY + yields %1 %2 + + pat exg $1==(2*QUAD) /* Exchange top two word pairs on stack */ + with ANY ANY ANY ANY + yields %2 %1 %4 %3 + + pat stl lol $1==$2 /* Store then load local */ + leaving + dup QUAD + stl $1 + + pat lal sti lal loi $1==$3 && $2==$4 /* Store then load local, of a different size */ + leaving + dup $2 + lal $1 + sti $2 + + pat ste loe $1==$2 /* Store then load external */ + leaving + dup QUAD + ste $1 + + + +/* Type conversions */ + + pat loc loc cii loc loc cii $1==$4 && $2==$5 /* madness, generated by the C compiler */ + leaving + loc $1 + loc $2 + cii + + pat loc loc cii loc loc cii $2==QUAD && $5==QUAD && $4<$2 /* madness, generated by the C compiler */ + leaving + loc $4 + loc $5 + cii + + pat loc loc ciu /* signed X -> unsigned X */ + leaving + loc $1 + loc $2 + cuu + + pat loc loc cuu $1==$2 /* unsigned X -> unsigned X */ + /* nop */ + + pat loc loc cii $1==$2 /* signed X -> signed X */ + /* nop */ + + pat loc loc cui $1==$2 /* unsigned X -> signed X */ + /* nop */ + + pat loc loc cui $1==BYTE && $2==QUAD /* unsigned char -> signed int */ + /* nop */ + + pat loc loc cui $1==WORD && $2==QUAD /* unsigned short -> signed int */ + /* nop */ + + pat loc loc cii $1==BYTE && $2>BYTE /* signed char -> anything */ + with GPR + yields {SIGNEX8, %1} + with SIGNEX8 + yields {SIGNEX8, %1.reg} + with SIGNEX16 + yields {SIGNEX8, %1.reg} + + pat loc loc cii $1==WORD && $2>WORD /* signed short -> anything */ + with GPR + yields {SIGNEX16, %1} + with SIGNEX8 + yields {SIGNEX16, %1.reg} + with SIGNEX16 + yields {SIGNEX16, %1.reg} + + + +/* Local variables */ + + pat lal /* Load address of local */ + uses REG + gen + sub %a, FP, GP + add %a, {CONST, $1} + yields %a + + pat lol /* Load quad from local */ + uses REG + gen + ld %a, {GPROFFSET, FP, $1} + yields %a + + pat ldl /* Load double-word from local */ + leaving + lol $1 + QUAD*1 + lol $1 + QUAD*0 + + pat stl /* Store to local */ + with GPR + gen + st %1, {GPROFFSET, FP, $1} + + pat sdl /* Store double-word to local */ + leaving + stl $1 + QUAD*0 + stl $1 + QUAD*1 + + pat lil /* Load from indirected local */ + leaving + lol $1 + loi QUAD + + pat sil /* Save to indirected local */ + leaving + lol $1 + sti QUAD + + pat stl lol $1==$2 /* Save then load (generated by C compiler) */ + leaving + dup QUAD + stl $1 + + pat zrl /* Zero local */ + leaving + loc 0 + stl $1 + + pat inl /* Increment local in register */ + leaving + lol $1 + loc 1 + adi QUAD + stl $1 + + pat del /* Decrement local in register */ + leaving + lol $1 + loc 1 + sbi QUAD + stl $1 + + + +/* Global variables */ + + pat lpi /* Load address of external function */ + leaving + lae $1 + + pat lae /* Load address of external */ + yields {LABEL, $1} + + pat loe /* Load word external */ + leaving + lae $1 + loi QUAD + + pat ste /* Store word external */ + leaving + lae $1 + sti QUAD + + pat zre /* Zero external */ + leaving + loc 0 + ste $1 + + pat ine /* Increment external */ + leaving + loe $1 + inc + ste $1 + + pat dee /* Decrement external */ + leaving + loe $1 + dec + ste $1 + + pat lde /* Load double external */ + leaving + lae $1 + loi QUAD*2 + + pat sde /* Store double external */ + leaving + lae $1 + sti QUAD*2 + + +/* Structures */ + + pat lof /* Load word offsetted */ + leaving + adp $1 + loi QUAD + + pat ldf /* Load double offsetted */ + with GPR + uses reusing %1, REG=%1, REG + gen + add %a, GP + ld %b, {GPROFFSET, %a, $1+4} + ld %a, {GPROFFSET, %a, $1+0} + yields %a %b + + pat stf /* Store word offsetted */ + leaving + adp $1 + sti QUAD + + pat sdf /* Store double offsetted */ + with GPR GPR GPR + uses reusing %3, REG=%3 + gen + add %a, GP + st %1, {GPROFFSET, %a, $1+0} + st %2, {GPROFFSET, %a, $1+4} + + + + +/* Loads and stores */ + + pat loi $1==BYTE /* Load byte indirect */ + with LABEL + uses REG + gen + ldb %a, %1 + yields %a + with GPR + uses reusing %1, REG + gen + ldb %a, {GPRGPR, %1, GP} + yields %a + + pat loi loc loc cii $1==WORD && $2==WORD && $3==QUAD /* Load short indirect and sign extend */ + with LABEL + uses REG + gen + ldhs %a, %1 + yields %a + with GPR + uses reusing %1, REG + gen + add %a, %1, GP + ldhs %a, {GPROFFSET, %a, 0} + yields %a + + pat loi $1==WORD /* Load short indirect */ + with LABEL + uses REG + gen + ldh %a, %1 + yields %a + with GPR + uses reusing %1, REG + gen + add %a, %1, GP + ldh %a, {GPROFFSET, %a, 0} + yields %a + + pat loi $1==QUAD /* Load quad indirect */ + with LABEL + uses REG + gen + ld %a, %1 + yields %a + with GPR + uses reusing %1, REG + gen + add %a, %1, GP + ld %a, {GPROFFSET, %a, 0} + yields %a + + pat loi $1==2*QUAD /* Load double-quad indirect */ + with LABEL + uses REG, REG + gen + lea %b, %1 + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + yields %b %a + with GPR + uses reusing %1, REG, REG + gen + add %b, %1, GP + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + yields %b %a + + pat loi $1==3*QUAD /* Load triple-quad indirect */ + with LABEL + uses REG, REG, REG + gen + lea %b, %1 + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + ld %b, {GPROFFSET, %b, 8} + yields %c %b %a + with GPR + uses reusing %1, REG, REG, REG + gen + add %b, %1, GP + ld %a, {GPROFFSET, %b, 0} + ld %b, {GPROFFSET, %b, 4} + ld %c, {GPROFFSET, %b, 8} + yields %c %b %a + + pat loi /* Load arbitrary size */ + leaving + loc $1 + los QUAD + + pat los /* Load arbitrary size */ + leaving + cal ".los" + + pat sti $1==BYTE /* Store byte indirect */ + with LABEL GPR + gen + stb %2, %1 + with LABEL SIGNEX8+SIGNEX16 + gen + stb %2.reg, %1 + with GPR GPR + gen + stb %2, {GPRGPR, %1, GP} + with GPR SIGNEX8+SIGNEX16 + gen + stb %2.reg, {GPRGPR, %1, GP} + + pat sti $1==WORD /* Store half-word indirect */ + with LABEL GPR + gen + sth %2, %1 + with LABEL SIGNEX16 + gen + sth %2.reg, %1 + with GPR GPR + uses reusing %1, REG + gen + add %a, %1, GP + sth %2, {GPROFFSET, %a, 0} + with GPR SIGNEX16 + uses reusing %1, REG + gen + add %a, %1, GP + sth %2.reg, {GPROFFSET, %a, 0} + + pat sti $1==QUAD /* Store quad indirect */ + with LABEL GPR + gen + st %2, %1 + with GPR GPR + uses reusing %1, REG + gen + add %a, %1, GP + st %2, {GPROFFSET, %a, 0} + + pat sti $1==2*QUAD /* Load double-quad indirect */ + with LABEL GPR GPR + uses REG + gen + lea %a, %1 + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + with GPR GPR GPR + uses reusing %1, REG=%1 + gen + add %a, GP + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + + pat sti $1==3*QUAD /* Load triple-quad indirect */ + with LABEL GPR GPR GPR + uses REG + gen + lea %a, %1 + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + st %4, {GPROFFSET, %a, 8} + with GPR GPR GPR GPR + uses reusing %1, REG=%1 + gen + add %a, GP + st %2, {GPROFFSET, %a, 0} + st %3, {GPROFFSET, %a, 4} + st %4, {GPROFFSET, %a, 8} + + pat sti /* Store arbitrary size */ + leaving + loc $1 + sts QUAD + + pat sts /* Store arbitrary size */ + leaving + cal ".sts" + + + +/* Arithmetic wrappers */ + + pat ads /* Add var to pointer */ + leaving adi $1 + + pat sbs /* Subtract var from pointer */ + leaving sbi $1 + + pat adp /* Add constant to pointer */ + leaving + loc $1 + adi QUAD + + pat adu /* Add unsigned */ + leaving + adi $1 + + pat sbu /* Subtract unsigned */ + leaving + sbi $1 + + pat inc /* Add 1 */ + leaving + loc 1 + adi QUAD + + pat dec /* Subtract 1 */ + leaving + loc 1 + sbi QUAD + + pat loc mlu /* Unsigned multiply by constant */ + leaving + loc $1 + mli QUAD + + pat mlu /* Unsigned multiply by var */ + leaving + mli QUAD + + pat loc slu /* Shift left unsigned by constant amount */ + leaving + loc $1 + sli $2 + + pat slu /* Shift left unsigned by variable amount */ + leaving + sli $1 + + + +/* Word arithmetic */ + + pat loc adi $1==0 /* Add nothing */ + /* nop */ + + pat adi $1==QUAD /* Add word (second + top) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + add %a, %1 + yields %a + with GPR GPR+CONST + uses reusing %1, REG=%1 + gen + add %a, %2 + yields %a + + pat loc sbi $1==0 /* Subtract nothing */ + /* nop */ + + pat sbi $1==QUAD /* Subtract word (second - top) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + sub %a, %1 + yields %a + + pat mli $1==QUAD /* Multiply word (second * top) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + mul %a, %1 + yields %a + with GPR GPR+CONST + uses reusing %1, REG=%1 + gen + mul %a, %2 + yields %a + + pat mlu + leaving + mli $1 + + pat dvi $1==QUAD /* Divide word (second / top) */ + with GPR GPR + uses reusing %2, REG + gen + divs %a, %2, %1 + yields %a + + pat dvu $1==QUAD /* Divide unsigned word (second / top) */ + with GPR GPR + uses reusing %2, REG + gen + divu %a, %2, %1 + yields %a + + pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ + with GPR GPR + uses REG + gen + divu %a, %2, %1 + mul %a, %1 + rsb %a, %2 + yields %a + + pat rmi $1==QUAD /* Remainder signed word (second % top) */ + with GPR GPR + uses REG + gen + divs %a, %2, %1 + mul %a, %1 + rsb %a, %2 + yields %a + + pat ngi $1==QUAD /* Negate word */ + with GPR + uses reusing %1, REG=%1 + gen + neg %a, %a + yields %a + + pat and $1==QUAD /* AND word */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + and %a, %1 + yields %a + with GPR GPR+CONST + uses reusing %1, REG=%1 + gen + and %a, %2 + yields %a + + pat ior $1==QUAD /* OR word */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + or %a, %1 + yields %a + with GPR GPR+CONST + uses reusing %1, REG=%1 + gen + or %a, %2 + yields %a + + pat xor $1==QUAD /* XOR word */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + eor %a, %1 + yields %a + with GPR GPR+CONST + uses reusing %1, REG=%1 + gen + eor %a, %2 + yields %a + + pat com $1==QUAD /* Complement */ + with GPR + uses reusing %1, REG=%1 + gen + mvn %a, %1 + yields %a + + pat dvi $1==QUAD /* Divide word (second / top) */ + with GPR GPR + uses reusing %2, REG + gen + divs %a, %2, %1 + yields %a + + pat dvu $1==QUAD /* Divide unsigned word (second / top) */ + with GPR GPR + uses reusing %2, REG + gen + divu %a, %2, %1 + yields %a + + pat rmu $1==QUAD /* Remainder unsigned word (second % top) */ + with GPR GPR + uses REG + gen + divu %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + + pat rmi $1==QUAD /* Remainder signed word (second % top) */ + with GPR GPR + uses REG + gen + divs %a, %2, %1 + mul %a, %1 + sub %a, %2 + yields %a + +#if 0 + pat mli $1==4 /* Multiply word (second * top) */ + with REG REG + uses reusing %2, REG + gen + mullw %a, %2, %1 + yields %a + + + pat xor $1==4 /* XOR word */ + with GPR GPR + yields {XOR_RR, %1, %2} + with GPR CONST + yields {XOR_RC, %1, %2.val} + with CONST GPR + yields {XOR_RC, %2, %1.val} + + pat xor !defined($1) /* XOR set */ + with STACK + gen + bl {LABEL, ".xor"} + + pat com $1==QUAD /* NOT word */ + with AND_RR + uses REG + gen + nand %a, %1.reg1, %1.reg2 + yields %a + with OR_RR + uses REG + gen + nor %a, %1.reg1, %1.reg2 + yields %a + with XOR_RR + uses REG + gen + eqv %a, %1.reg1, %1.reg2 + yields %a + with GPR + yields {NOT_R, %1} + + pat com !defined($1) /* NOT set */ + with STACK + gen + bl {LABEL, ".com"} +#endif + + pat sli $1==4 /* Shift left (second << top) */ + with CONST+GPR GPR + uses reusing %2, REG=%2 + gen + lsl %a, %1 + yields %a + + pat sri $1==4 /* Shift right signed (second >> top) */ + with CONST+GPR GPR + uses reusing %2, REG=%2 + gen + asr %2, %1 + yields %a + + pat sru $1==4 /* Shift right unsigned (second >> top) */ + with CONST+GPR GPR + uses reusing %2, REG=%2 + gen + lsr %2, %1 + yields %a + + + +/* Special arithmetic */ + + pat loc sli adi $1==1 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<1) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + adds2 %a, %1 + yields %a + + pat loc sli adi $1==2 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<2) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + adds4 %a, %1 + yields %a + + pat loc sli adi $1==3 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<3) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + adds8 %a, %1 + yields %a + + pat loc sli adi $1==4 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<4) */ + with GPR+CONST GPR + uses reusing %2, REG=%2 + gen + adds16 %a, %1 + yields %a + + pat loc sli adi $1==8 && $2==QUAD && $3==QUAD /* Shift and add (second + top<<8) */ + with GPR GPR + uses reusing %2, REG + gen + adds256 %a, %2, %1 + yields %a + + pat loc sli ads + leaving + loc $1 + sli $2 + adi $3 + + + +/* Arrays */ + + pat aar $1==QUAD /* Index array */ + with STACK + uses GPR0 + gen + bl {LABEL, ".aar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 + gen + bl {LABEL, ".aar4"} + yields R0 + + pat lae lar $2==QUAD && nicesize(rom($1, 3)) /* Load array */ + leaving + lae $1 + aar QUAD + loi rom($1, 3) + + pat lar $1==QUAD /* Load array */ + with STACK + uses GPR0 + gen + bl {LABEL, ".lar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 + gen + bl {LABEL, ".lar4"} + yields R0 + + pat lae sar $2==QUAD && nicesize(rom($1, 3)) /* Store array */ + leaving + lae $1 + aar QUAD + sti rom($1, 3) + + pat sar $1==QUAD /* Store array */ + with STACK + uses GPR0 + gen + bl {LABEL, ".sar4stack"} + yields R0 + with GPR0 GPR1 GPR2 + uses GPR0 + gen + bl {LABEL, ".sar4"} + + + +/* Sets */ + + pat set $1==QUAD /* Create quad with one bit set */ + with GPR + uses reusing %1, REG + gen + bset %a, %1 + yields %a + + pat set defined($1) /* Any other set */ + leaving + loc $1 + cal ".set" + + pat set !defined($1) /* Create structure with set bit (variable) */ + leaving + cal ".set" + + pat inn defined($1) /* Test for set bit */ + leaving + set $1 + and $1 + + pat inn !defined($1) /* Test for set bit (variable) */ + leaving + cal ".inn" + + pat ior !nicesize($1) /* OR set */ + leaving + cal ".ior" + + pat ior !defined($1) /* OR set */ + leaving + cal ".ior" + + pat and !nicesize($1) /* AND set */ + leaving + loc $1 + cal ".and" + + pat and !defined($1) /* AND set */ + leaving + cal ".and" + + + +/* Boolean resolutions */ + + proc cm_t example teq + with GPR GPR + uses reusing %1, REG + gen + cmp %1, %2 + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat cmu teq call cm_t("add.eq") /* top = (second == top) */ + pat cmu tne call cm_t("add.ne") /* top = (second != top) */ + pat cmu tlt call cm_t("add.lo") /* top = unsigned (second < top) */ + pat cmu tle call cm_t("add.ls") /* top = unsigned (second <= top) */ + pat cmu tgt call cm_t("add.hi") /* top = unsigned (second < top) */ + pat cmu tge call cm_t("add.hs") /* top = unsigned (second >= top) */ + pat cmi teq call cm_t("add.eq") /* top = (second == top) */ + pat cmi tne call cm_t("add.ne") /* top = (second != top) */ + pat cmi tlt call cm_t("add.lt") /* top = signed (second < top) */ + pat cmi tle call cm_t("add.le") /* top = signed (second <= top) */ + pat cmi tgt call cm_t("add.gt") /* top = signed (second < top) */ + pat cmi tge call cm_t("add.ge") /* top = signed (second >= top) */ + + proc cmf_t example teq + with GPR GPR + uses reusing %1, REG + gen + fcmp %a, %1, %2 + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat cmf teq call cmf_t("add.eq") /* top = float (second == top) */ + pat cmf tne call cmf_t("add.ne") /* top = float (second != top) */ + pat cmf tlt call cmf_t("add.lo") /* top = float (second < top) */ + pat cmf tle call cmf_t("add.ls") /* top = float (second <= top) */ + pat cmf tgt call cmf_t("add.hi") /* top = float (second > top) */ + pat cmf tge call cmf_t("add.hs") /* top = float (second >= top) */ + + proc fallback_t example teq + with GPR + uses reusing %1, REG + gen + cmp %1, {CONST, 0} + mov %a, {CONST, 0} + add[1] %a, {CONST, 1} + yields %a + + pat teq call fallback_t("add.eq") /* top = float (top == 0) */ + pat tne call fallback_t("add.ne") /* top = float (top != 0) */ + pat tlt call fallback_t("add.lo") /* top = float (top < 0) */ + pat tle call fallback_t("add.ls") /* top = float (top <= 0) */ + pat tgt call fallback_t("add.hi") /* top = float (top > 0) */ + pat tge call fallback_t("add.hs") /* top = float (top >= 0) */ + + + +/* Simple branches */ + + proc anyz example zeq + with GPR STACK + kills ALL + gen + cmp %1, {CONST, 0} + beq[1] {LABEL, $1} + + pat zeq call anyz("b.eq") /* Branch if signed top == 0 */ + pat zne call anyz("b.ne") /* Branch if signed top != 0 */ + pat zgt call anyz("b.gt") /* Branch if signed top > 0 */ + pat zlt call anyz("b.lt") /* Branch if signed top < 0 */ + pat zge call anyz("b.ge") /* Branch if signed top >= 0 */ + pat zle call anyz("b.le") /* Branch if signed top <= 0 */ + + proc anyb example beq + with GPR+CONST GPR STACK + kills ALL + gen + cmp %2, %1 + beq[1] {LABEL, $1} + + pat beq call anyb("b.eq") /* Branch if signed second == top */ + pat bne call anyb("b.ne") /* Branch if signed second != top */ + pat bgt call anyb("b.gt") /* Branch if signed second > top */ + pat bge call anyb("b.ge") /* Branch if signed second >= top */ + pat blt call anyb("b.lt") /* Branch if signed second < top */ + pat ble call anyb("b.le") /* Branch if signed second <= top */ + + proc cmu_z example cmu zeq + with GPR+CONST GPR STACK + kills ALL + gen + cmp %2, %1 + beq[1] {LABEL, $2} + + pat cmu zeq call cmu_z("b.eq") /* Branch if unsigned second == top */ + pat cmu zne call cmu_z("b.ne") /* Branch if unsigned second != top */ + pat cmu zgt call cmu_z("b.hi") /* Branch if unsigned second > top */ + pat cmu zlt call cmu_z("b.lo") /* Branch if unsigned second < top */ + pat cmu zge call cmu_z("b.hs") /* Branch if unsigned second >= top */ + pat cmu zle call cmu_z("b.ls") /* Branch if unsigned second <= top */ + pat cmi zeq call cmu_z("b.eq") /* Branch if signed second == top */ + pat cmi zne call cmu_z("b.ne") /* Branch if signed second != top */ + pat cmi zgt call cmu_z("b.gt") /* Branch if signed second > top */ + pat cmi zlt call cmu_z("b.lt") /* Branch if signed second < top */ + pat cmi zge call cmu_z("b.ge") /* Branch if signed second >= top */ + pat cmi zle call cmu_z("b.le") /* Branch if signed second <= top */ + + proc cmf_z example cmu zeq + with GPR GPR STACK + kills ALL + gen + fcmp %2, %2, %1 + beq[1] {LABEL, $2} + + pat cmf zeq call cmf_z("b.eq") /* Branch if float second == top */ + pat cmf zne call cmf_z("b.ne") /* Branch if float second != top */ + pat cmf zgt call cmf_z("b.gt") /* Branch if float second > top */ + pat cmf zlt call cmf_z("b.lt") /* Branch if float second < top */ + pat cmf zge call cmf_z("b.ge") /* Branch if float second >= top */ + pat cmf zle call cmf_z("b.le") /* Branch if float second <= top */ + + pat cmp /* Compare pointers */ + leaving + cmu QUAD + + pat cms $1==QUAD /* Compare blocks (word sized) */ + leaving + cmi QUAD + + + + + +/* Other branching and labelling */ + +#if 0 + pat lab topeltsize($1)<=4 && !fallthrough($1) + gen + labeldef $1 + yields R0 + + pat lab topeltsize($1)<=4 && fallthrough($1) + with GPR0 + gen + labeldef $1 + yields %1 + + pat lab topeltsize($1)>4 + with STACK + kills ALL + gen + labeldef $1 + + pat bra topeltsize($1)<=4 /* Unconditional jump with TOS register */ + with GPR0 STACK + gen + b {LABEL, $1} + + pat bra topeltsize($1)>4 /* Unconditional jump without TOS register */ + with STACK + gen + b {LABEL, $1} +#endif + + pat lab + with STACK + kills ALL + gen + labeldef $1 + + pat bra + with STACK + kills ALL + gen + b {LABEL, $1} + + + + +/* Miscellaneous */ + + pat cal /* Call procedure */ + with STACK + kills ALL + gen + bl {LABEL, $1} + + pat cai /* Call procedure indirect */ + with GPR STACK + kills ALL + gen + bl %1 + + pat lfr $1==QUAD /* Load function result, word */ + yields R0 + + pat lfr $1==QUAD*2 /* Load function result, word */ + yields R1 R0 + + pat ret $1==0 /* Return from procedure */ + gen + mov SP, FP + pop FP, PC + + pat ret $1==QUAD /* Return from procedure, word */ + with GPR0 + gen + mov SP, FP + pop FP, PC + + pat ret $1==QUAD*2 /* Return from procedure, word */ + with GPR GPR + gen + move %1, R0 + move %2, R1 + mov SP, FP + pop FP, PC + + pat blm /* Block move constant length */ + leaving + loc $1 + bls + + pat bls /* Block move variable length */ + with STACK + kills ALL + gen + bl {LABEL, "_memmove"} + + pat csa /* Array-lookup switch */ + with GPR0 GPR1 STACK + kills ALL + gen + b {LABEL, ".csa"} + + pat csb /* Table-lookup switch */ + with GPR0 GPR1 STACK + kills ALL + gen + bl {LABEL, ".csb"} + + + +/* EM specials */ + + pat fil /* Set current filename */ + leaving + lae $1 + ste ".filename" + + pat lin /* Set current line number */ + leaving + loc $1 + ste ".linenumber" + + pat lni /* Increment line number */ + leaving + ine ".linenumber" + + pat lim /* Load EM trap ignore mask */ + leaving + lde ".ignmask" + + pat sim /* Store EM trap ignore mask */ + leaving + ste ".ignmask" + + pat trp /* Raise EM trap */ + leaving + cal ".trap" + + pat sig /* Set trap handler */ + leaving + ste ".trppc" + + pat rtt /* Return from trap */ + leaving + ret 0 + + pat lxl $1==0 /* Load FP */ + leaving + lor 0 + + pat lxl $1==1 /* Load caller's FP */ + leaving + lxl 0 + dch + + pat dch /* FP -> caller FP */ + with GPR + uses reusing %1, REG + gen + ld %a, {GPROFFSET, %1, FP_OFFSET} + sub %a, GP + yields %a + + pat lpb /* Convert FP to argument address */ + leaving + adp EM_BSIZE + + pat lxa /* Load caller's SP */ + leaving + lxl $1 + lpb + + pat gto /* longjmp */ + uses REG, REG + gen + move {LABEL, $1}, %a + ld %b, {GPROFFSET, %a, 8} + add FP, %b, GP + ld %b, {GPROFFSET, %a, 4} + add SP, %b, GP + ld %b, {GPROFFSET, %a, 0} + add %b, GP + b %b + +#if 0 + + pat gto /* longjmp */ + with STACK + gen + ld {LABEL, $1+2} + wspec {CONST, 1} + ld {LABEL, $1+4} + wspec {CONST, 0} + ld {LABEL, $1+0} + wspec {CONST, 2} + + pat str $1==1 /* Store special GPRister */ + with GPR0 + gen + wspec {CONST, $1} + +#endif + + pat lor $1==0 /* Load FP */ + uses REG + gen + move FP, %a + yields %a + + pat lor $1==1 /* Load SP */ + uses REG + gen + move SP, %a + yields %a + + pat lor $1==2 /* Load HP */ + leaving + loe ".reghp" + + pat str $1==0 /* Store FP */ + with GPR + gen + sub FP, %1, GP + + pat str $1==1 /* Store SP */ + with GPR + gen + sub SP, %1, GP + + pat str $1==2 /* Store HP */ + leaving + ste ".reghp" + + pat ass /* Adjust stack by variable amount */ + with CONST+GPR STACK + gen + add SP, %1 + + pat asp $1==QUAD /* Adjust stack by constant amount */ + with GPR + /* silently ignore GPR */ + with STACK + gen + pop SCRATCH + + pat asp $1==(2*QUAD) /* Adjust stack by constant amount */ + with GPR GPR + /* silently ignore GPR */ + with STACK + gen + add SP, {CONST, 2*QUAD} + + pat asp /* Adjust stack by constant amount */ + leaving + loc $1 + ass + + + +/* Floating point */ + + pat ngf /* Negate float */ + leaving + loc 0 + exg QUAD + sbf QUAD + + proc simple_f example adf + with GPR GPR + uses reusing %1, REG + gen + fadd[1] %a, %2, %1 + yields %a + + pat adf call simple_f("fadd") /* Float subtract (second + top) */ + pat sbf call simple_f("fsub") /* Float subtract (second - top) */ + pat mlf call simple_f("fmul") /* Float multiply (second * top) */ + pat dvf call simple_f("fdiv") /* Float divide (second / top) */ + + pat loc loc cff $1==$2 && $1==QUAD /* Convert float to float */ + leaving + nop + + pat loc loc cfi $1==$2 && $1==QUAD /* Convert float -> integer */ + with GPR + uses reusing %1, REG + gen + ftrunc %a, %1 + yields %a + + pat loc loc cfu $1==$2 && $1==QUAD /* Convert float -> unsigned */ + with GPR + uses reusing %1, REG + gen + ftrunc %a, %1 + yields %a + + pat loc loc cif $1==$2 && $1==QUAD /* Convert integer -> float */ + with GPR + uses reusing %1, REG + gen + flts %a, %1 + yields %a + + pat loc loc cuf $1==$2 && $1==QUAD /* Convert unsigned -> float */ + with GPR + uses reusing %1, REG + gen + fltu %a, %1 + yields %a + + pat fef /* Split float */ + leaving + loc 0 + loc 0 +#if 0 + cal ".cuf" + lfr QUAD*2 +#endif + + pat fif /* Multiply float and split (?) */ + leaving + mlf QUAD + fef + + pat zrf /* Load a floating zero */ + leaving + loc 0 diff --git a/mach/vc4/test/opcodes.s b/mach/vc4/test/opcodes.s new file mode 100644 index 000000000..87a50d070 --- /dev/null +++ b/mach/vc4/test/opcodes.s @@ -0,0 +1,362 @@ +# +/* + * VideoCore IV assembler test file + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +.sect .text +.sect .rom +.sect .data +.sect .bss +.sect .text + +main: + nop + rti + + b r0 + b r31 + bl r0 + bl r31 + tbb r0 + tbb r15 + tbs r0 + tbs r15 + + nop + + mov r0, r1 + cmn r0, r1 + add r0, r1 + bic r0, r1 + mul r0, r1 + eor r0, r1 + sub r0, r1 + and r0, r1 + mvn r0, r1 + ror r0, r1 + cmp r0, r1 + rsb r0, r1 + btst r0, r1 + or r0, r1 + extu r0, r1 + max r0, r1 + bset r0, r1 + min r0, r1 + bclr r0, r1 + adds2 r0, r1 + bchg r0, r1 + adds4 r0, r1 + adds8 r0, r1 + adds16 r0, r1 + exts r0, r1 + neg r0, r1 + lsr r0, r1 + clz r0, r1 + lsl r0, r1 + brev r0, r1 + asr r0, r1 + abs r0, r1 + + nop + + mov.f r0, r1 + cmn.f r0, r1 + add.f r0, r1 + bic.f r0, r1 + mul.f r0, r1 + eor.f r0, r1 + sub.f r0, r1 + and.f r0, r1 + mvn.f r0, r1 + ror.f r0, r1 + cmp.f r0, r1 + rsb.f r0, r1 + btst.f r0, r1 + or.f r0, r1 + extu.f r0, r1 + max.f r0, r1 + bset.f r0, r1 + min.f r0, r1 + bclr.f r0, r1 + adds2.f r0, r1 + bchg.f r0, r1 + adds4.f r0, r1 + adds8.f r0, r1 + adds16.f r0, r1 + exts.f r0, r1 + neg.f r0, r1 + lsr.f r0, r1 + clz.f r0, r1 + lsl.f r0, r1 + brev.f r0, r1 + asr.f r0, r1 + abs.f r0, r1 + + nop + + mov r0, r1, r2 + cmn r0, r1, r2 + add r0, r1, r2 + bic r0, r1, r2 + mul r0, r1, r2 + eor r0, r1, r2 + sub r0, r1, r2 + and r0, r1, r2 + mvn r0, r1, r2 + ror r0, r1, r2 + cmp r0, r1, r2 + rsb r0, r1, r2 + btst r0, r1, r2 + or r0, r1, r2 + extu r0, r1, r2 + max r0, r1, r2 + bset r0, r1, r2 + min r0, r1, r2 + bclr r0, r1, r2 + adds2 r0, r1, r2 + bchg r0, r1, r2 + adds4 r0, r1, r2 + adds8 r0, r1, r2 + adds16 r0, r1, r2 + exts r0, r1, r2 + neg r0, r1, r2 + lsr r0, r1, r2 + clz r0, r1, r2 + lsl r0, r1, r2 + brev r0, r1, r2 + asr r0, r1, r2 + abs r0, r1, r2 + + nop + + mov r0, #0x1f + cmn r0, #0x1f + add r0, #0x1f + bic r0, #0x1f + mul r0, #0x1f + eor r0, #0x1f + sub r0, #0x1f + and r0, #0x1f + mvn r0, #0x1f + ror r0, #0x1f + cmp r0, #0x1f + rsb r0, #0x1f + btst r0, #0x1f + or r0, #0x1f + extu r0, #0x1f + max r0, #0x1f + bset r0, #0x1f + min r0, #0x1f + bclr r0, #0x1f + adds2 r0, #0x1f + bchg r0, #0x1f + adds4 r0, #0x1f + adds8 r0, #0x1f + adds16 r0, #0x1f + exts r0, #0x1f + neg r0, #0x1f + lsr r0, #0x1f + clz r0, #0x1f + lsl r0, #0x1f + brev r0, #0x1f + asr r0, #0x1f + abs r0, #0x1f + + nop + + mov.f r0, #0x1f + cmn.f r0, #0x1f + add.f r0, #0x1f + bic.f r0, #0x1f + mul.f r0, #0x1f + eor.f r0, #0x1f + sub.f r0, #0x1f + and.f r0, #0x1f + mvn.f r0, #0x1f + ror.f r0, #0x1f + cmp.f r0, #0x1f + rsb.f r0, #0x1f + btst.f r0, #0x1f + or.f r0, #0x1f + extu.f r0, #0x1f + max.f r0, #0x1f + bset.f r0, #0x1f + min.f r0, #0x1f + bclr.f r0, #0x1f + adds2.f r0, #0x1f + bchg.f r0, #0x1f + adds4.f r0, #0x1f + adds8.f r0, #0x1f + adds16.f r0, #0x1f + exts.f r0, #0x1f + neg.f r0, #0x1f + lsr.f r0, #0x1f + clz.f r0, #0x1f + lsl.f r0, #0x1f + brev.f r0, #0x1f + asr.f r0, #0x1f + abs.f r0, #0x1f + + add r0, #0x12345678 + add r0, r1, #0x12345678 + sub r0, #0x12345678 + + nop + + fadd r0, r1, r2 + fsub r0, r1, r2 + fmul r0, r1, r2 + fdiv r0, r1, r2 + fcmp r0, r1, r2 + fabs r0, r1, r2 + frsb r0, r1, r2 + fmax r0, r1, r2 + frcp r0, r1, r2 + frsqrt r0, r1, r2 + fnmul r0, r1, r2 + fmin r0, r1, r2 + fld1 r0, r1, r2 + fld0 r0, r1, r2 + log2 r0, r1, r2 + exp2 r0, r1, r2 + divs r0, r1, r2 + divu r0, r1, r2 + divs r0, r1, #31 + divu r0, r1, #31 + adds256 r0, r1, r2 + + nop + + fadd.f r0, r1, r2 + fsub.f r0, r1, r2 + fmul.f r0, r1, r2 + fdiv.f r0, r1, r2 + fcmp.f r0, r1, r2 + fabs.f r0, r1, r2 + frsb.f r0, r1, r2 + fmax.f r0, r1, r2 + frcp.f r0, r1, r2 + frsqrt.f r0, r1, r2 + fnmul.f r0, r1, r2 + fmin.f r0, r1, r2 + fld1.f r0, r1, r2 + fld0.f r0, r1, r2 + log2.f r0, r1, r2 + exp2.f r0, r1, r2 + divs.f r0, r1, r2 + divu.f r0, r1, r2 + divs.f r0, r1, #31 + divu.f r0, r1, #31 + adds256.f r0, r1, r2 + +label: + b label + b forward + b label + b main + b.f label + b.f forward + b.f main + bl label + bl forward + bl main +forward: + + push r0 + push r0, lr + push r0-r5 + push r0-r5, lr + push r6 + push r16 + push r24 + push lr + + pop r0 + pop r0, pc + pop r0-r5 + pop r0-r5, pc + pop r6 + pop r16 + pop r24 + pop pc + + nop + + ld r0, (sp) + st r0, (sp) + ld r0, 4(sp) + st r0, 4(sp) + ld r0, -4(sp) + st r0, -4(sp) + ld r0, 5(sp) + st r0, 5(sp) + ld r0, -5(sp) + st r0, -5(sp) + + ld r0, (r1) + st r0, (r1) + ld r16, (r1) + st r16, (r1) + ldh r0, (r1) + sth r0, (r1) + ldb r0, (r1) + stb r0, (r1) + ldhs r0, (r1) + sths r0, (r1) + ldh r16, (r1) + sth r16, (r1) + ldb r16, (r1) + stb r16, (r1) + ldhs r16, (r1) + sths r16, (r1) + ld r0, 0x3c (r1) + st r0, 0x3c (r1) + ld r0, 0xfff (r1) + st r0, 0xfff (r1) + ld r1, 0xffff (r0) + st r1, 0xffff (r0) + ld r0, -1 (r1) + st r0, -1 (r1) + ld r16, 0x3c (r1) + st r16, 0x3c (r1) + ld r16, 0xfff (r1) + st r16, 0xfff (r1) + ld r16, 0xffff (r0) + st r16, 0xffff (r0) + ld r16, -1 (r1) + st r16, -1 (r1) + + ld.f r0, (r1) + st.f r0, (r1) + ld.f r0, 8 (r1) + st.f r0, 8 (r1) + + ld r0, (r1, r2) + st r0, (r1, r2) + ld.f r0, (pc, pc) + st.f r0, (pc, pc) + +near: + ld r0, (r1)++ + st r0, (r1)++ + ld.f pc, (pc)++ + st.f pc, (pc)++ + + ld r0, near + ld r0, main + st r0, near + st r0, main + ldb r0, near + ldb r0, main + stb r0, near + stb r0, main + + b.eq r0, r1, near + b r0, r1, near + addcmpb r0, r1, r2, . + addcmpb r0, #1, r2, . + addcmpb r0, r1, #1, . + addcmpb r0, #1, #2, . diff --git a/man/.distr b/man/.distr index d21bfd287..4e52c6c32 100644 --- a/man/.distr +++ b/man/.distr @@ -1,14 +1,8 @@ -6500_as.6 -6800_as.6 -6805_as.6 -6809_as.6 -8080_as.6 -z8000_as.6 +i80_as.6 i86_as.6 i386_as.6 -m68k2_as.6 -ns_as.6 -pdp_as.6 +m68020_as.6 +vc4_as.6 z80_as.6 em_cg.6 em_ncg.6 @@ -17,4 +11,3 @@ libpc.7 head pc_prlib.7 uni_ass.6 -proto.make diff --git a/man/8080_as.6 b/man/i80_as.6 similarity index 100% rename from man/8080_as.6 rename to man/i80_as.6 diff --git a/man/m68k2_as.6 b/man/m68020_as.6 similarity index 100% rename from man/m68k2_as.6 rename to man/m68020_as.6 diff --git a/man/vc4_as.6 b/man/vc4_as.6 new file mode 100644 index 000000000..81b10813d --- /dev/null +++ b/man/vc4_as.6 @@ -0,0 +1,45 @@ +.\" $Header$ +.TH VC4_AS 1 +.ad +.SH NAME +vc4_as \- assembler for Broadcom VideoCore IV + +.SH SYNOPSIS +/usr/em/lib/vc4_as [options] argument ... + +.SH DESCRIPTION +This assembler is made with the general framework +described in \fIuni_ass\fP(6). + +.SH SYNTAX +The assembler uses a modified version of the syntax described in +https://github.com/hermanhermitage/videocoreiv/wiki/VideoCore-IV-Programmers-Manual: +condition codes must be prefixed with a full stop. Vector instructions are not +yet supported. + +.SH "SEE ALSO" +uni_ass(6), +ack(1), +.br +https://github.com/hermanhermitage/videocoreiv +.SH EXAMPLE +.nf +.ta 8n 16n 24n 32n 40n 48n +An example of VideoCore IV assembly language: + + ldb r0, __uart_status + b.eq r0, #0, 1b + + ! receive 1 byte (returned in r0) + mov r1, #AUX_MU_LSR_REG + mov r2, #AUX_MU_IO_REG + ! loop until char available +recvwait: + ld r3, (r1) + and r3, #0x1 + b.ne r3, #0x1, recvwait + + ldb r0, (r2) +1: + b lr +.fi diff --git a/plat/cpm/descr b/plat/cpm/descr index a72f50276..2d626295a 100644 --- a/plat/cpm/descr +++ b/plat/cpm/descr @@ -3,11 +3,19 @@ # $Revision$ var w=2 +var wa=1 var p=2 +var pa=1 var s=2 +var sa=1 var l=4 +var la=1 var f=4 +var fa=1 var d=8 +var da=1 +var x=8 +var xa=1 var ARCH=i80 var PLATFORM=cpm var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linux386/descr b/plat/linux386/descr index ff2b26dfb..3bdd9d768 100644 --- a/plat/linux386/descr +++ b/plat/linux386/descr @@ -3,11 +3,19 @@ # $Revision$ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=i386 var PLATFORM=linux386 var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linux68k/descr b/plat/linux68k/descr index 14f973d52..997ec7154 100644 --- a/plat/linux68k/descr +++ b/plat/linux68k/descr @@ -3,11 +3,19 @@ # $Revision: 1.1 $ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=m68020 var PLATFORM=linux68k var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/linuxppc/descr b/plat/linuxppc/descr index 3311f24c6..69b00fbc5 100644 --- a/plat/linuxppc/descr +++ b/plat/linuxppc/descr @@ -3,11 +3,19 @@ # $Revision: 1.1 $ var w=4 -var p=4 +var wa=4 +var p={w} +var pa={w} var s=2 -var l=4 -var f=4 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} var d=8 +var da={d} +var x=8 +var xa={x} var ARCH=powerpc var PLATFORM=linuxppc var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/pc86/descr b/plat/pc86/descr index a3f11fc4f..c282e9cc7 100644 --- a/plat/pc86/descr +++ b/plat/pc86/descr @@ -3,11 +3,19 @@ # $Revision$ var w=2 +var wa=1 var p=2 +var pa=1 var s=2 +var sa=1 var l=4 +var la=1 var f=4 +var fa=1 var d=8 +var da=1 +var x=8 +var xa=1 var ARCH=i86 var PLATFORM=pc86 var PLATFORMDIR={EM}/share/ack/{PLATFORM} diff --git a/plat/rpi/.distr b/plat/rpi/.distr new file mode 100644 index 000000000..529eec7e9 --- /dev/null +++ b/plat/rpi/.distr @@ -0,0 +1,31 @@ +descr +boot.s +build.mk +README +include/ack/config.h +include/sys/select.h +include/unistd.h +include/pi.h +include/termios.h +libsys/brk.c +libsys/close.c +libsys/creat.c +libsys/errno.s +libsys/getpid.c +libsys/_hol0.s +libsys/isatty.c +libsys/kill.c +libsys/libsysasm.h +libsys/libsys.h +libsys/lseek.c +libsys/open.c +libsys/pi_phys_to_user.s +libsys/pi_uart.s +libsys/pi_user_to_phys.s +libsys/read.c +libsys/select.c +libsys/signal.c +libsys/tcgetattr.c +libsys/tcsetattr.c +libsys/time.c +libsys/write.c diff --git a/plat/rpi/README b/plat/rpi/README new file mode 100644 index 000000000..7d78433dd --- /dev/null +++ b/plat/rpi/README @@ -0,0 +1,69 @@ +VideoCore IV support in the ACK +=============================== + +This is a fairly crude port of the ACK to produce VideoCore IV machine +code, suitable for use on the Raspberry Pi. It produces terrible but +working code. The resulting binaries can be used either bare metal or +loaded as a GPU kernel and executed using a modified mailbox.c (see below). +Currently floating point support is present but incomplete; and as the +VideoCore IV does not have double-precision float support, the C compiler +treats doubles as single precision. + +As much of the standard C library as is relevant works; if +you're running in bare-metal mode, you can hook stdin/stdout up to the +mini UART. (Obviously, in kernel mode you can't.) + +Important note! The malloc heap expects your program to be loaded into a +chunk of memory that's 128kB large. You must make sure that this is the case, +or Bad Stuff will happen. + +Output binaries are fully PIC and can be loaded anywhere (this is one of the +things that makes the code so terrible). You must use the pi_user_to_phys() +and pi_phys_to_user() to translate pointers from physical to user and vice +versa. If you don't, Bad Stuff will happen. + + + +Bare metal mode +--------------- + +To run a binary bare metal, compile it: + + ack -mrpi -O program.c -o bootcode.bin + +...and copy the bootcode.bin file to the root of an SD card. Boot the Pi. +Your program will run. + +To use the UART, #include and call pi_init_uart() at the top of your +program. This will set it up and connect it to stdin/stdout. It's 115200 8n1. + + + +Kernel mode +----------- + +This will require some hacking at your end. + +Go here, and follow the instructions. + +https://github.com/hermanhermitage/videocoreiv/wiki/VideoCore-IV-Kernels-under-Linux + +Now compile your program: + + ack -mrpi -O program.c -o alpha.bin + +MAKE SURE YOU AREN'T USING ANY MEMORY ALLOCATION. Copy the alpha.bin onto +the Pi, and run it with mailbox.c. + +To get data in and out, #include and look at the pi_kernel_parameters +variable. It's a structure that is initialised with the data that's passed in +from mailbox.c (currently four pointers and two integers). + +If you want to use malloc() and friends, you'll need to hack mailbox.c so +that the buffer containing the code is at least 128kB, or you're likely to +corrupt the VideoCore's workspace and crash it. + + +David Given +2013-06-06 + diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s new file mode 100644 index 000000000..a76d1e7a7 --- /dev/null +++ b/plat/rpi/boot.s @@ -0,0 +1,120 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +.sect .text + +#define gp r15 +#define STACKSIZE 1*1024 + +! MAIN ENTRY POINT + +begtext: + ! This empty space is required by the boot loader. + +kernel_start: + ! When running as a kernel, we need to preserve all registers. We save + ! them onto the default stack. + push r0-r24 + b baremetal_start + .space 506 ! first 512 bytes are ignored by the boot loader +baremetal_start: + ! Wipe the bss (including the new stack). + + lea r6, begbss + lea r7, endbss + mov r8, #0 +_1: + stb r8, (r6) + addcmpb.lt r6, #1, r7, _1 + + ! Save system registers. + + st fp, .returnfp + st sp, .returnsp + st lr, .returnlr + + lea gp, begtext + lea sp, .stack + STACKSIZE + + ! Save the kernel parameters. + + sub r0, gp ! fix up pointer + sub r1, gp ! fix up pointer + sub r2, gp ! fix up pointer + sub r3, gp ! fix up pointer + push r0-r5 + sub r0, sp, gp + st r0, _pi_kernel_parameters + + ! Push standard parameters onto the stack and go. + + mov r0, #0 + push r0 ! envp + push r0 ! argv + push r0 ! argc + + ! Call the language startup code. + + bl __m_a_i_n + + ! Fall through to __exit if this returns. + +.define __exit +__exit: + ! It only makes sense to get here if we're in kernel mode. If we're in + ! bare-metal mode, we'll just crash, but that's fine. + + st r0, _pi_kernel_parameters ! save return value + mov r0, sr + ld fp, .returnfp + ld sp, .returnsp + ld lr, .returnlr + pop r0-r24 + ld r0, _pi_kernel_parameters ! restore return value + b lr + +! Define symbols at the beginning of our various segments, so that we can find +! them. (Except .text, which has already been done.) + +.define begtext, begdata, begbss +.sect .data; begdata: +.sect .rom; begrom: +.sect .bss; begbss: + +! Some magic data. All EM systems need these. + +.define .trppc, .ignmask, _errno +.comm .trppc, 4 +.comm .ignmask, 4 +.comm _errno, 4 + +! We store the stack pointer and return address on entry so that we can +! cleanly exit. + +.comm .returnfp, 4 +.comm .returnsp, 4 +.comm .returnlr, 4 + +.define _pi_kernel_parameters +.comm _pi_kernel_parameters, 4 + +.define .linenumber, .filename +.comm .linenumber, 4 ! current linenumber (used for debugging) +.comm .filename, 4 ! ptr to current filename (used for debugging) + +! User stack. + +.comm .stack, STACKSIZE + diff --git a/plat/rpi/build.mk b/plat/rpi/build.mk new file mode 100644 index 000000000..c89847607 --- /dev/null +++ b/plat/rpi/build.mk @@ -0,0 +1,52 @@ +# Build script for Raspberry Pi bare-metal executables (using the +# VideoCore IV processor, not the ARM). +# +# © 2013 David Given +# This file is redistributable under the terms of the 3-clause BSD license. +# See the file 'Copying' in the root of the distribution for the full text. + +ARCH := vc4 +PLATFORM := rpi +OPTIMISATION := -O + +D := plat/rpi/ + +platform-headers := \ + unistd.h \ + termios.h \ + pi.h \ + ack/config.h + +platform-libsys := \ + _hol0.s \ + errno.s \ + pi_phys_to_user.s \ + pi_user_to_phys.s \ + pi_uart.s \ + pi_fast_mode.s \ + creat.c \ + close.c \ + open.c \ + read.c \ + write.c \ + isatty.c \ + brk.c \ + getpid.c \ + kill.c \ + lseek.c \ + time.c \ + signal.c \ + tcgetattr.c \ + tcsetattr.c \ + select.c + +$(eval $(call build-platform)) + +define build-rpi-boot-impl + $(call reset) + $(call ackfile, $D/boot.s) + $(call installto, $(PLATIND)/$(PLATFORM)/boot.o) +endef + +$(eval $(build-rpi-boot-impl)) + diff --git a/plat/rpi/descr b/plat/rpi/descr new file mode 100644 index 000000000..6daff9606 --- /dev/null +++ b/plat/rpi/descr @@ -0,0 +1,77 @@ +# $Source$ +# $State$ +# $Revision$ + +var w=4 +var wa=4 +var p={w} +var pa={w} +var s=2 +var sa={s} +var l={w} +var la={w} +var f={w} +var fa={w} +var d={w} +var da={w} +var x={w} +var xa={w} +var ARCH=vc4 +var PLATFORM=rpi +var PLATFORMDIR={EM}/share/ack/{PLATFORM} +var CPP_F=-D__unix +var ALIGN=-a0:2 -a1:4 -a2:4 -a3:4 +var MACHOPT_F=-m8 + +# Override the setting in fe so that files compiled for this platform can see +# the platform-specific headers. + +var C_INCLUDES=-I{PLATFORMDIR}/include -I{EM}/share/ack/include/ansi + +name be + from .m.g + to .s + program {EM}/lib/ack/{PLATFORM}/ncg + args < + stdout + need .e +end +name as + from .s.so + to .o + program {EM}/lib/ack/{PLATFORM}/as + args - -o > < + prep cond +end +name led + from .o.a + to .out + program {EM}/lib/ack/em_led + mapflag -l* LNAME={PLATFORMDIR}/lib* + mapflag -i SEPID=-b1:0 + mapflag -fp FLOATS={EM}/{ILIB}fp + args {ALIGN} {SEPID?} \ + (.e:{HEAD}={PLATFORMDIR}/boot.o) \ + ({RTS}:.ocm.b={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.c={PLATFORMDIR}/c-ansi.o) \ + ({RTS}:.mod={PLATFORMDIR}/modula2.o) \ + ({RTS}:.p={PLATFORMDIR}/pascal.o) \ + -o > < \ + (.p:{TAIL}={PLATFORMDIR}/libpascal.a) \ + (.b:{TAIL}={PLATFORMDIR}/libbasic.a) \ + (.mod:{TAIL}={PLATFORMDIR}/libmodula2.a) \ + (.ocm:{TAIL}={PLATFORMDIR}/liboccam.a) \ + (.ocm.b.mod.c.p:{TAIL}={PLATFORMDIR}/libc.a) \ + {FLOATS?} \ + (.e:{TAIL}={PLATFORMDIR}/libem.a \ + {PLATFORMDIR}/libsys.a \ + {PLATFORMDIR}/libend.a) + linker +end +name cv + from .out + to .img + program {EM}/bin/aslod + args < > + outfile raspberrypi.bin +end diff --git a/plat/rpi/include/ack/config.h b/plat/rpi/include/ack/config.h new file mode 100644 index 000000000..fd2c48cba --- /dev/null +++ b/plat/rpi/include/ack/config.h @@ -0,0 +1,11 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _ACK_CONFIG_H +#define _ACK_CONFIG_H + +#endif diff --git a/plat/rpi/include/pi.h b/plat/rpi/include/pi.h new file mode 100644 index 000000000..24e89a9a1 --- /dev/null +++ b/plat/rpi/include/pi.h @@ -0,0 +1,48 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef PI_H +#define PI_H + +/* When running in kernel mode, this structure gets the incoming parameters. + * In bare metal mode, it's gibberish. */ + +struct pi_kernel_parameters +{ + int r5; + int r4; + void* r3; + void* r2; + void* r1; + void* r0; +}; + +extern struct pi_kernel_parameters* pi_kernel_parameters; + +/* Initialise the mini UART (only do this if running on bare metal! */ +extern void pi_init_uart(void); + +/* Converts a pointer from a physical address to a user address. */ +extern void* pi_phys_to_user(void* ptr); + +/* Converts a pointer from a user address to a physical address. */ +extern void* pi_user_to_phys(void* ptr); + +/* Change the clock speed from 19.2MHz to 250MHz. Must be called *before* + * pi_init_uart(). */ +extern void pi_fast_mode(void); + +/* Initialise the RAM. */ +extern void pi_init_ram(void); + +/* The current clock speed (used by pi_init_uart to calculate the correct + * UART settings). */ + +extern int pi_clock_speed; + +#endif + diff --git a/plat/rpi/include/sys/select.h b/plat/rpi/include/sys/select.h new file mode 100644 index 000000000..df7488da4 --- /dev/null +++ b/plat/rpi/include/sys/select.h @@ -0,0 +1,13 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _SYS_SELECT_H +#define _SYS_SELECT_H + +#include + +#endif diff --git a/plat/rpi/include/termios.h b/plat/rpi/include/termios.h new file mode 100644 index 000000000..8e0dbb8e8 --- /dev/null +++ b/plat/rpi/include/termios.h @@ -0,0 +1,47 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _TERMIOS_H +#define _TERMIOS_H + +typedef unsigned char tcflag_t; + +struct termios +{ + tcflag_t c_iflag; + tcflag_t c_oflag; + tcflag_t c_lflag; + tcflag_t c_cflag; +}; + +#define ONLCR 1 +#define ECHO 2 +#define INLCR 4 + +/* Dummied parameters for compatibility --- only the ones above are + * honoured. */ + +#define BRKINT 0 +#define ICRNL 0 +#define INPCK 0 +#define ISTRIP 0 +#define IXON 0 +#define CS8 0 +#define ICANON 0 +#define IEXTEN 0 +#define ISIG 0 + +#define OPOST ONLCR + +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +extern int tcgetattr(int fd, struct termios* t); +extern int tcsetattr(int fd, int actions, struct termios* t); + +#endif diff --git a/plat/rpi/include/unistd.h b/plat/rpi/include/unistd.h new file mode 100644 index 000000000..a4d0c4507 --- /dev/null +++ b/plat/rpi/include/unistd.h @@ -0,0 +1,105 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef _UNISTD_H +#define _UNISTD_H + +#include +#include + +/* Types */ + +typedef int pid_t; +typedef int mode_t; + +typedef long suseconds_t; + +/* Time handling. */ + +struct timeval +{ + time_t tv_sec; + suseconds_t tv_usec; +}; + +struct timezone +{ + int tz_minuteswest; + int tz_dsttime; +}; /* obsolete, unused */ + +extern int gettimeofday(struct timeval* tv, struct timezone* tz); +extern int settimeofday(const struct timeval* tv, const struct timezone* tz); + +/* Constants for file access (open and friends) */ + +enum +{ + O_ACCMODE = 0x3, + + O_RDONLY = 0, + O_WRONLY = 1, + O_RDWR = 2, + + O_CREAT = 0100, + O_TRUNC = 01000, + O_APPEND = 02000, + O_NONBLOCK = 04000 +}; + +/* Special variables */ + +extern char** environ; + +/* Implemented system calls */ + +extern void _exit(int); +extern pid_t getpid(void); +extern void* sbrk(intptr_t increment); +extern int isatty(int d); +extern off_t lseek(int fildes, off_t offset, int whence); +extern int close(int d); +extern int open(const char* path, int access, ...); +extern int creat(const char* path, mode_t mode); +extern int read(int fd, void* buffer, size_t count); +extern int write(int fd, void* buffer, size_t count); + +/* Unimplemented system calls (these are just prototypes to let the library + * compile). */ + +extern int fcntl(int fd, int op, ...); + +/* Signal handling */ + +typedef int sig_atomic_t; + +#define SIG_ERR ((sighandler_t) -1) /* Error return. */ +#define SIG_DFL ((sighandler_t) 0) /* Default action. */ +#define SIG_IGN ((sighandler_t) 1) /* Ignore signal. */ + +#define SIGABRT 6 /* Abort (ANSI) */ +#define SIGILL 11 /* Illegal instruction */ + +#define _NSIG 32 /* Biggest signal number + 1 + (not including real-time signals). */ +typedef void (*sighandler_t)(int); +extern sighandler_t signal(int signum, sighandler_t handler); +extern int raise(int signum); + +/* Select */ + +typedef uint32_t fd_set; + +extern int select(int nfds, fd_set *readfds, fd_set *writefds, + fd_set *exceptfds, struct timeval *timeout); + +#define FD_ZERO(set) do { *set = 0; } while (0) +#define FD_SET(fd, set) do { *set |= (1< +#include +#include +#include + +#define OUT_OF_MEMORY (void*)(-1) /* sbrk returns this on failure */ +#define STACK_BUFFER 1024 /* number of bytes to leave for stack */ + +extern char _end[1]; +static char* current = _end; + +/* Top of heap: we assume that the block of memory the binary is loaded in + * is 256kB long. Because user pointers are always relative to the beginning + * of the block, this makes the end address easy to calculate. */ +static char* max = (char*) (128*1024); + +int brk(void* newend) +{ + if ((newend >= (void*)max) || (newend < (void*)_end)) + return -1; + + current = newend; + return 0; +} + +void* sbrk(intptr_t increment) +{ + char* old; + + if (increment == 0) + return current; + + old = current; + if (brk(old + increment) < 0) + return OUT_OF_MEMORY; + + return old; +} diff --git a/plat/rpi/libsys/close.c b/plat/rpi/libsys/close.c new file mode 100644 index 000000000..60fa0f96b --- /dev/null +++ b/plat/rpi/libsys/close.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int close(int fd) +{ + errno = EBADF; + return -1; +} diff --git a/plat/rpi/libsys/creat.c b/plat/rpi/libsys/creat.c new file mode 100644 index 000000000..7c009e6a0 --- /dev/null +++ b/plat/rpi/libsys/creat.c @@ -0,0 +1,17 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +int open(const char* path, int access, ...) +{ + errno = EACCES; + return -1; +} diff --git a/plat/rpi/libsys/errno.s b/plat/rpi/libsys/errno.s new file mode 100644 index 000000000..a2e1f8b55 --- /dev/null +++ b/plat/rpi/libsys/errno.s @@ -0,0 +1,31 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +#define D(e) .define e; e + +.sect .data + +! Define various ACK error numbers. Note that these are *not* ANSI C +! errnos, and are used for different purposes. + +D(ERANGE) = 1 +D(ESET) = 2 +D(EIDIVZ) = 6 +D(EHEAP) = 17 +D(EILLINS) = 18 +D(EODDZ) = 19 +D(ECASE) = 20 +D(EBADMON) = 25 + diff --git a/plat/rpi/libsys/getpid.c b/plat/rpi/libsys/getpid.c new file mode 100644 index 000000000..0ae1f6154 --- /dev/null +++ b/plat/rpi/libsys/getpid.c @@ -0,0 +1,15 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +pid_t getpid(void) +{ + return 0; +} diff --git a/plat/rpi/libsys/isatty.c b/plat/rpi/libsys/isatty.c new file mode 100644 index 000000000..83837ba9c --- /dev/null +++ b/plat/rpi/libsys/isatty.c @@ -0,0 +1,15 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int isatty(int fd) +{ + return 1; +} diff --git a/plat/rpi/libsys/kill.c b/plat/rpi/libsys/kill.c new file mode 100644 index 000000000..bacc405df --- /dev/null +++ b/plat/rpi/libsys/kill.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +int kill(pid_t pid, int sig) +{ + errno = EINVAL; + return -1; +} diff --git a/plat/rpi/libsys/libsys.h b/plat/rpi/libsys/libsys.h new file mode 100644 index 000000000..44b3c7818 --- /dev/null +++ b/plat/rpi/libsys/libsys.h @@ -0,0 +1,19 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef LIBSYS_H +#define LIBSYS_H + +extern void _sys_rawwrite(unsigned char b); +extern unsigned char _sys_rawread(void); +extern int _sys_rawpoll(void); + +extern void _sys_write_tty(char c); + +extern int _sys_ttyflags; + +#endif diff --git a/plat/rpi/libsys/libsysasm.h b/plat/rpi/libsys/libsysasm.h new file mode 100644 index 000000000..16dbbcfba --- /dev/null +++ b/plat/rpi/libsys/libsysasm.h @@ -0,0 +1,20 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#ifndef LIBSYSASM_H +#define LIBSYSASM_H + +! Declare segments (the order is important). + +.sect .text +.sect .rom +.sect .data +.sect .bss + +#define gp r15 + +#endif diff --git a/plat/rpi/libsys/lseek.c b/plat/rpi/libsys/lseek.c new file mode 100644 index 000000000..9a487d747 --- /dev/null +++ b/plat/rpi/libsys/lseek.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include + +off_t lseek(int fd, off_t offset, int whence) +{ + errno = EINVAL; + return -1; +} diff --git a/plat/rpi/libsys/open.c b/plat/rpi/libsys/open.c new file mode 100644 index 000000000..cbdc30ec1 --- /dev/null +++ b/plat/rpi/libsys/open.c @@ -0,0 +1,16 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include "libsys.h" + +int creat(const char* path, int mode) +{ + return open(path, O_CREAT|O_WRONLY|O_TRUNC, mode); +} diff --git a/plat/rpi/libsys/pi_fast_mode.s b/plat/rpi/libsys/pi_fast_mode.s new file mode 100644 index 000000000..8b50990f0 --- /dev/null +++ b/plat/rpi/libsys/pi_fast_mode.s @@ -0,0 +1,70 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +#define PASSWD 0x5a000000 +#define PLLC 5 +#define OSC 1 + +#define A2W 0x7e102000 +#define A2W_PLLC_MULT 0x7e102020 +#define A2W_PLLC_MULT2 0x7e102120 +#define A2W_PLLC_MULT_FRACT 0x7e102220 +#define A2W_PLLx_DIV 0x7e102620 + +#define CM 0x7e101000 +#define CM_VPU_CTL 0x7e101008 +#define CM_VPU_DIV 0x7e10100c +#define CM_TIME_DIV 0x7e1010ec +#define CM_TIME_CTL 0x7e1010e8 + +#define hash # +#define copy(A) A +#define poke(A, V) \ + mov r0, copy(hash) V; mov r1, copy(hash) A; st r0, (r1) + +! Changes the clock speed to 250MHz. + +.define _pi_fast_mode +_pi_fast_mode: + poke(A2W + 0x190, 0x5a000001) + poke(A2W_PLLC_MULT_FRACT, PASSWD | 87380) + poke(A2W_PLLC_MULT2, PASSWD | 52 | 0x1000) + poke(A2W + 0x3c, 0x5a000100) + poke(A2W + 0x38, 0x5a000000) + poke(A2W + 0x34, 0x5a144000) + poke(A2W + 0x30, 0x5a000000) + poke(CM + 0x108, 0x5a000200) + poke(CM + 0x108, 0x5a0002aa) + poke(A2W + 0x2c, 0x5a000000) + poke(A2W + 0x28, 0x5a400000) + poke(A2W + 0x24, 0x5a000005) + poke(A2W_PLLC_MULT, PASSWD | 52 | 0x555000) + poke(A2W_PLLC_MULT2, PASSWD | 52 | 0x21000) + poke(A2W + 0x2c, 0x5a000042) + poke(A2W + 0x28, 0x5a500401) + poke(A2W + 0x24, 0x5a004005) + poke(A2W_PLLC_MULT, PASSWD | 52 | 0x555000) + poke(A2W_PLLx_DIV, PASSWD | 2) + poke(CM + 0x108, 0x5a0002ab) + poke(CM + 0x108, 0x5a0002aa) + poke(CM + 0x108, 0x5a0002a8) + poke(CM_VPU_CTL, PASSWD | 0x200 | OSC | 0x40) + poke(CM_VPU_DIV, PASSWD | [4 << 12]) + poke(CM_VPU_CTL, PASSWD | PLLC | 0x40) + poke(CM_VPU_CTL, PASSWD | PLLC | 0x50) + poke(CM_TIME_DIV, PASSWD | [19 << 12] | 819) + poke(CM_TIME_CTL, PASSWD | OSC | 0x10) + + mov r0, #250000000 + st r0, _pi_clock_speed + b lr + diff --git a/plat/rpi/libsys/pi_phys_to_user.s b/plat/rpi/libsys/pi_phys_to_user.s new file mode 100644 index 000000000..d67cac895 --- /dev/null +++ b/plat/rpi/libsys/pi_phys_to_user.s @@ -0,0 +1,20 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Transforms a physical address into a user address. + +.define _pi_phys_to_user +_pi_phys_to_user: + ld r0, 0 (sp) + sub r0, gp + b lr + diff --git a/plat/rpi/libsys/pi_uart.s b/plat/rpi/libsys/pi_uart.s new file mode 100644 index 000000000..b7ce9898e --- /dev/null +++ b/plat/rpi/libsys/pi_uart.s @@ -0,0 +1,185 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Because of the low system clock rate, this baud rate might be inaccurate +! So be careful with your serial/terminal, some adjustment may be necessary. +TARGET_BAUD_RATE = 115200 + +GPFSEL1 = 0x7e200004 +GPSET0 = 0x7e20001C +GPCLR0 = 0x7e200028 +GPPUD = 0x7e200094 +GPPUDCLK0 = 0x7e200098 + +AUX_ENABLES = 0x7e215004 +AUX_MU_IO_REG = 0x7e215040 +AUX_MU_IER_REG = 0x7e215044 +AUX_MU_IIR_REG = 0x7e215048 +AUX_MU_LCR_REG = 0x7e21504C +AUX_MU_MCR_REG = 0x7e215050 +AUX_MU_LSR_REG = 0x7e215054 +AUX_MU_MSR_REG = 0x7e215058 +AUX_MU_SCRATCH = 0x7e21505C +AUX_MU_CNTL_REG = 0x7e215060 +AUX_MU_STAT_REG = 0x7e215064 +AUX_MU_BAUD_REG = 0x7e215068 + +! Sets up the mini UART for use as a console. + +.define _pi_init_uart +_pi_init_uart: + ! Configure TX and RX GPIO pins for Mini Uart function. + mov r1, #GPFSEL1 + ld r0, (r1) + and r0, #~[7<<12] + or r0, #2<<12 + and r0, #~[7<<15] + or r0, #2<<15 + st r0, (r1) + + mov r1, #GPPUD + mov r0, #0 + st r0, (r1) + +delay1: + add r0, #1 + cmp r0, #150 + b.ne delay1 + + mov r1, #GPPUDCLK0 + mov r0, #[1<<14]|[1<<15] + st r0, (r1) + + mov r0, #0 +delay2: + add r0, #1 + cmp r0, #150 + b.ne delay2 + + mov r1, #GPPUDCLK0 + mov r0, #0 + st r0, (r1) + + ! Set up serial port + mov r1, #AUX_ENABLES + mov r0, #1 + st r0, (r1) + + mov r1, #AUX_MU_IER_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_CNTL_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_LCR_REG + mov r0, #3 + st r0, (r1) + + mov r1, #AUX_MU_MCR_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_IER_REG + mov r0, #0 + st r0, (r1) + + mov r1, #AUX_MU_IIR_REG + mov r0, #0xC6 + st r0, (r1) + + mov r1, #AUX_MU_BAUD_REG + ld r0, _pi_clock_speed + mov r2, #TARGET_BAUD_RATE*8 + divu r0, r0, r2 + sub r0, #1 + st r0, (r1) + + mov r1, #AUX_MU_LCR_REG + mov r0, #3 + st r0, (r1) + + mov r1, #AUX_MU_CNTL_REG + mov r0, #3 + st r0, (r1) + + ! Mark the uart as being initialised. + mov r0, #1 + stb r0, __uart_status + + b lr + +! Send a single byte. + +.define __sys_rawwrite +__sys_rawwrite: + ldb r0, __uart_status + b.eq r0, #0, 1f + + ld r0, (sp) + mov r1, #AUX_MU_LSR_REG + ! loop until space available in Tx buffer +sendwait: + ld r2, (r1) + and r2, #0x20 + cmp r2, #0x20 + b.ne sendwait + + mov r1, #AUX_MU_IO_REG + stb r0, (r1) + +1: + b lr + +! Poll to see if there's incoming data available. + +.define __sys_rawpoll +.define __sys_rawpoll +__sys_rawpoll: + ldb r0, __uart_status + b.eq r0, #0, 1b + + mov r1, #AUX_MU_LSR_REG + ld r0, (r1) + and r0, #0x1 ! 0 if no data, 1 if data +1: + b lr + +! Receive a single byte. + +.define __sys_rawread +__sys_rawread: + ldb r0, __uart_status + b.eq r0, #0, 1b + + ! receive 1 byte (returned in r0) + mov r1, #AUX_MU_LSR_REG + mov r2, #AUX_MU_IO_REG + ! loop until char available +recvwait: + ld r3, (r1) + and r3, #0x1 + b.ne r3, #0x1, recvwait + + ldb r0, (r2) +1: + b lr + +.comm __uart_status, 1 + +.sect .data +.define _pi_clock_speed + +! System clock is running directly off the 19.2MHz crystal at initial reset +_pi_clock_speed: + .data4 19200000 diff --git a/plat/rpi/libsys/pi_user_to_phys.s b/plat/rpi/libsys/pi_user_to_phys.s new file mode 100644 index 000000000..dd62c069a --- /dev/null +++ b/plat/rpi/libsys/pi_user_to_phys.s @@ -0,0 +1,20 @@ +# +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include "libsysasm.h" + +.sect .text + +! Transforms a user address into a physical address. + +.define _pi_user_to_phys +_pi_user_to_phys: + ld r0, 0 (sp) + add r0, gp + b lr + diff --git a/plat/rpi/libsys/read.c b/plat/rpi/libsys/read.c new file mode 100644 index 000000000..227c89997 --- /dev/null +++ b/plat/rpi/libsys/read.c @@ -0,0 +1,41 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int read(int fd, void* buffer, size_t count) +{ + char i; + + /* We're only allowed to read from fd 0, 1 or 2. */ + + if ((fd < 0) || (fd > 2)) + { + errno = EBADF; + return -1; + } + + /* Empty buffer? */ + + if (count == 0) + return 0; + + /* Read one byte. */ + + i = _sys_rawread(); + if ((i == '\r') && !(_sys_ttyflags & INLCR)) + i = '\n'; + if (_sys_ttyflags & ECHO) + _sys_write_tty(i); + + *(char*)buffer = i; + return 1; +} diff --git a/plat/rpi/libsys/select.c b/plat/rpi/libsys/select.c new file mode 100644 index 000000000..280bfd694 --- /dev/null +++ b/plat/rpi/libsys/select.c @@ -0,0 +1,65 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include +#include "libsys.h" + +#define TICKS_PER_SEC 1000000 + +typedef int condition_t(void); + +static int nop_condition(void) +{ + return 0; +} + +int select(int nfds, fd_set *readfds, fd_set *writefds, + fd_set *exceptfds, struct timeval *timeout) +{ + int result = 0; + condition_t* condition = nop_condition; + + if (FD_ISSET(0, readfds)) + condition = _sys_rawpoll; + + FD_ZERO(readfds); + FD_ZERO(writefds); + FD_ZERO(exceptfds); + + if (timeout) + { + /* Wait for a specified amount of time. */ + + uint32_t ticks = (timeout->tv_sec * TICKS_PER_SEC) + + (timeout->tv_usec * (TICKS_PER_SEC/1000000)); + uint32_t* timer_clo = pi_phys_to_user((void*) 0x7e003004); + uint32_t ra = *timer_clo; + + while (!condition() && ((*timer_clo - ra) < ticks)) + ; + } + else + { + /* Wait forever. */ + + while (!condition()) + ; + + } + + if ((condition == _sys_rawpoll) && condition()) + { + FD_SET(0, readfds); + result = 1; + } + + return result; +} diff --git a/plat/rpi/libsys/signal.c b/plat/rpi/libsys/signal.c new file mode 100644 index 000000000..10a2ded29 --- /dev/null +++ b/plat/rpi/libsys/signal.c @@ -0,0 +1,17 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +sighandler_t signal(int signum, sighandler_t handler) +{ + return SIG_DFL; +} diff --git a/plat/rpi/libsys/tcgetattr.c b/plat/rpi/libsys/tcgetattr.c new file mode 100644 index 000000000..3b099afb5 --- /dev/null +++ b/plat/rpi/libsys/tcgetattr.c @@ -0,0 +1,22 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int tcgetattr(int fd, struct termios* t) +{ + t->c_iflag = _sys_ttyflags & INLCR; + t->c_oflag = _sys_ttyflags & ONLCR; + t->c_lflag = _sys_ttyflags & ECHO; + t->c_cflag = 0; + return 0; +} + diff --git a/plat/rpi/libsys/tcsetattr.c b/plat/rpi/libsys/tcsetattr.c new file mode 100644 index 000000000..1943d33e0 --- /dev/null +++ b/plat/rpi/libsys/tcsetattr.c @@ -0,0 +1,19 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int tcsetattr(int fd, int actions, struct termios* t) +{ + _sys_ttyflags = t->c_iflag | t->c_oflag | t->c_lflag; + return 0; +} + diff --git a/plat/rpi/libsys/time.c b/plat/rpi/libsys/time.c new file mode 100644 index 000000000..e448a33d0 --- /dev/null +++ b/plat/rpi/libsys/time.c @@ -0,0 +1,19 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +time_t time(time_t* t) +{ + if (t) + *t = 0; + return 0; +} diff --git a/plat/rpi/libsys/write.c b/plat/rpi/libsys/write.c new file mode 100644 index 000000000..0fba49884 --- /dev/null +++ b/plat/rpi/libsys/write.c @@ -0,0 +1,49 @@ +/* + * Raspberry Pi support library for the ACK + * © 2013 David Given + * This file is redistributable under the terms of the 3-clause BSD license. + * See the file 'Copying' in the root of the distribution for the full text. + */ + +#include +#include +#include +#include +#include "libsys.h" + +int _sys_ttyflags = ONLCR | INLCR | ECHO; + +void _sys_write_tty(char c) +{ + _sys_rawwrite(c); + if ((c == '\n') && (_sys_ttyflags & ONLCR)) + _sys_rawwrite('\r'); +} + +int write(int fd, void* buffer, size_t count) +{ + int i; + char* p = buffer; + + /* We're only allowed to write to fd 0, 1 or 2. */ + + if ((fd < 0) || (fd > 2)) + { + errno = EBADF; + return -1; + } + + /* Write all data. */ + + i = 0; + while (i < count) + { + _sys_write_tty(*p++); + + i++; + } + + /* No failures. */ + + return count; +} diff --git a/util/amisc/ashow.c b/util/amisc/ashow.c index 809937d71..b11e60e76 100644 --- a/util/amisc/ashow.c +++ b/util/amisc/ashow.c @@ -143,6 +143,9 @@ showrelo() case RELOH2: printf("\ttop 2 bytes of a 4 byte word\n"); break; + case RELOVC4: + printf("\tVideoCore IV address in 32-bit instruction\n"); + break; default: printf("\tunknown relocation type %d\n", relrec.or_type & RELSZ); break; diff --git a/util/led/ack.out.5 b/util/led/ack.out.5 index 8e85b3f92..d9e24bff6 100644 --- a/util/led/ack.out.5 +++ b/util/led/ack.out.5 @@ -164,6 +164,7 @@ struct outrelo { #define RELO4 0x03 /* 4 bytes */ #define RELOPPC 0x04 /* 26-bit PowerPC address */ #define RELOH2 0x05 /* write top 2 bytes of 4 byte word */ +#define RELOVC4 0x06 /* VideoCore IV address in 32-bit insruction */ #define RELPC 0x08 /* pc relative */ #define RELBR 0x10 /* High order byte lowest address. */ #define RELWR 0x20 /* High order word lowest address. */ diff --git a/util/led/relocate.c b/util/led/relocate.c index 93b1e9c05..3cc9ff904 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -8,6 +8,8 @@ static char rcsid[] = "$Id$"; #include #include +#include +#include #include "out.h" #include "const.h" #include "debug.h" @@ -43,6 +45,65 @@ static long read4(char* addr, int type) return ((long)word1 << (2 * WIDTH)) + word0; } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static long get_vc4_valu(char* addr) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27] + */ + + int32_t value = read4(addr+2, 0); + value &= 0x07ffffff; + value = value<<5>>5; + return value; + } + + if ((opcode & 0xf080) == 0x9000) + { + /* b $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t value = read4(addr, RELWR); + value &= 0x007fffff; + value = value<<9>>9; + value *= 2; + return value; + } + + if ((opcode & 0xf080) == 0x9080) + { + /* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + int32_t value = read4(addr, RELWR); + int32_t lov = value & 0x007fffff; + int32_t hiv = value & 0x0f000000; + value = lov | (hiv>>1); + value = value<<5>>5; + value *= 2; + return value; + } + + if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + return read4(addr+2, 0); + } + + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -63,6 +124,8 @@ getvalu(addr, type) return read4(addr, type) & 0x03FFFFFD; case RELOH2: return read2(addr, type) << 16; + case RELOVC4: + return get_vc4_valu(addr); default: fatal("bad relocation size"); } @@ -106,6 +169,60 @@ static void write4(long valu, char* addr, int type) } } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static void put_vc4_valu(char* addr, long value) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27] + */ + + uint32_t v = read4(addr+2, 0); + v &= 0xf8000000; + v |= value & 0x07ffffff; + write4(v, addr+2, 0); + } + else if ((opcode & 0xf080) == 0x9000) + { + /* b dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t v = read4(addr, RELWR); + v &= 0xff800000; + v |= (value/2) & 0x007fffff; + write4(v, addr, RELWR); + } + else if ((opcode & 0xf080) == 0x9080) + { + /* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + uint32_t v = read4(addr, RELWR); + uint32_t lovalue = (value/2) & 0x007fffff; + uint32_t hivalue = (value/2) & 0x07800000; + v &= 0xf0800000; + v |= lovalue | (hivalue<<1); + write4(v, addr, RELWR); + } + else if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + write4(value, addr+2, 0); + } + else + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -138,6 +255,9 @@ putvalu(valu, addr, type) case RELOH2: write2(valu>>16, addr, type); break; + case RELOVC4: + put_vc4_valu(addr, valu); + break; default: fatal("bad relocation size"); }