diff --git a/mach/vc4/as/mach5.c b/mach/vc4/as/mach5.c index d6f71e7fb..b1ce314bc 100644 --- a/mach/vc4/as/mach5.c +++ b/mach/vc4/as/mach5.c @@ -7,6 +7,11 @@ #define maskx(v, x) (v & ((1<<(x))-1)) +static void toobig(void) +{ + serror("offset too big to encode into instruction"); +} + /* Assemble an ALU instruction where rb is a register. */ void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) @@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) void branch_instr(int bl, int cc, struct expr_t* expr) { + quad pc = DOTVAL; quad type = expr->typ & S_TYP; /* Sanity checking. */ @@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) /* The VC4 branch instructions express distance in 2-byte * words. */ - int d = (expr->val - DOTVAL) / 2; + int d = (expr->val - pc) / 2; /* We now know the worst case for the instruction layout. At * this point we can emit the instructions, which may shrink @@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) * close enough to the program counter, we can use a short- * form instruction. */ - if ((d >= -128) && (d < 127)) + if (fitx(d, 7)) { emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); break; @@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr) /* Absolute addresses and references to other sections * need the full 32 bits. */ - newrelo(expr->typ, RELOVC4 | RELPC); + newrelo(expr->typ, RELOVC4|RELPC); if (bl) { - quad v = d & 0x07ffffff; - quad hiv = v >> 23; - quad lov = v & 0x007fffff; + quad v, hiv, lov; + + if (!fitx(d, 27)) + toobig(); + + v = maskx(d, 27); + hiv = v >> 23; + lov = v & 0x007fffff; emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); emit2(B16(00000000,00000000) | (lov&0xffff)); } else { - quad v = d & 0x007fffff; + quad v; + + if (!fitx(d, 23)) + toobig(); + + v = maskx(d, 23); emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); emit2(B16(00000000,00000000) | (v&0xffff)); } @@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) void mem_address_instr(quad opcode, int rd, struct expr_t* expr) { + static const char sizes[] = {4, 2, 1, 2}; + int size = sizes[opcode]; quad type = expr->typ & S_TYP; /* Sanity checking. */ @@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) case 1: case 2: { - /* The VC4 branch instructions express distance in 2-byte - * words. */ - - int d = (expr->val - DOTVAL) / 2; + int d = expr->val - DOTVAL; /* We now know the worst case for the instruction layout. At * this point we can emit the instructions, which may shrink @@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) if (type == DOTTYP) { + int scaledd = d/size; + /* This is a reference to an address within this section. If * it's close enough to the program counter, we can use a * shorter instruction. */ - if (fitx(d, 16)) + if (fitx(scaledd, 16)) { emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); - emit2(d); + emit2(scaledd); return; } } /* Otherwise we need the full 48 bits. */ - if (!fitx(d, 27)) - serror("offset too big to encode into instruction"); + newrelo(expr->typ, RELOVC4|RELPC); - newrelo(expr->typ, RELOVC4 | RELPC); + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + + if (!fitx(d, 27)) + toobig(); emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); emit4((31<<27) | maskx(d, 27)); @@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs) void lea_address_instr(int rd, struct expr_t* expr) { - newrelo(expr->typ, RELOVC4); + quad pc = DOTVAL; + quad type = expr->typ & S_TYP; + + if (type == S_ABS) + serror("can't use absolute addresses here"); + + newrelo(expr->typ, RELOVC4|RELPC); + + /* VC4 relocations store the PC-relative delta into the + * destination section in the instruction data. The linker will + * massage this, and scale it appropriately. */ + emit2(B16(11100101,00000000) | (rd<<0)); - emit4(expr->val); + emit4(expr->val - pc); } diff --git a/mach/vc4/libem/dummy.s b/mach/vc4/libem/dummy.s index 4edaa030f..fdbcc4c38 100644 --- a/mach/vc4/libem/dummy.s +++ b/mach/vc4/libem/dummy.s @@ -7,3 +7,8 @@ */ #include "videocore.h" + +.define __dummy +.sect .data +__dummy: + diff --git a/plat/rpi/boot.s b/plat/rpi/boot.s index 3cf4f3fe1..b848e65c4 100644 --- a/plat/rpi/boot.s +++ b/plat/rpi/boot.s @@ -16,6 +16,10 @@ .sect .text begtext: + lea r15, begtext + st sp, .returnsp + st lr, .returnlr + #if 0 ! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) @@ -33,6 +37,12 @@ begtext: #endif b __m_a_i_n +.define __exit +__exit: + ld sp, .returnsp + ld lr, .returnlr + b lr + ! Define symbols at the beginning of our various segments, so that we can find ! them. (Except .text, which has already been done.) @@ -47,3 +57,9 @@ begtext: .comm .trppc, 4 .comm .ignmask, 4 .comm _errno, 4 + +! We store the stack pointer and return address on entry so that we can +! cleanly exit. + +.comm .returnsp, 4 +.comm .returnlr, 4 diff --git a/util/led/relocate.c b/util/led/relocate.c index f44a34b96..3cc9ff904 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -8,6 +8,7 @@ static char rcsid[] = "$Id$"; #include #include +#include #include #include "out.h" #include "const.h" @@ -44,6 +45,65 @@ static long read4(char* addr, int type) return ((long)word1 << (2 * WIDTH)) + word0; } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static long get_vc4_valu(char* addr) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27] + */ + + int32_t value = read4(addr+2, 0); + value &= 0x07ffffff; + value = value<<5>>5; + return value; + } + + if ((opcode & 0xf080) == 0x9000) + { + /* b $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t value = read4(addr, RELWR); + value &= 0x007fffff; + value = value<<9>>9; + value *= 2; + return value; + } + + if ((opcode & 0xf080) == 0x9080) + { + /* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + int32_t value = read4(addr, RELWR); + int32_t lov = value & 0x007fffff; + int32_t hiv = value & 0x0f000000; + value = lov | (hiv>>1); + value = value<<5>>5; + value *= 2; + return value; + } + + if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + return read4(addr+2, 0); + } + + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -65,21 +125,7 @@ getvalu(addr, type) case RELOH2: return read2(addr, type) << 16; case RELOVC4: - { - long i = read4(addr, type); - if (i & 0x00800000) - { - /* Branch instruction. */ - return (i<<9)>>9; - } - else - { - /* Branch-link instruction. */ - long hi = (i<<4)>>28; - long lo = (i & 0x007fffff); - return lo | (hi<<23); - } - } + return get_vc4_valu(addr); default: fatal("bad relocation size"); } @@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type) } } +/* VideoCore 4 fixups are complex as we need to patch the instruction in + * one of several different ways (depending on what the instruction is). + */ + +static void put_vc4_valu(char* addr, long value) +{ + uint16_t opcode = read2(addr, 0); + + if ((opcode & 0xff00) == 0xe700) + { + /* ld rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27] + * st rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27] + */ + + uint32_t v = read4(addr+2, 0); + v &= 0xf8000000; + v |= value & 0x07ffffff; + write4(v, addr+2, 0); + } + else if ((opcode & 0xf080) == 0x9000) + { + /* b dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + */ + + uint32_t v = read4(addr, RELWR); + v &= 0xff800000; + v |= (value/2) & 0x007fffff; + write4(v, addr, RELWR); + } + else if ((opcode & 0xf080) == 0x9080) + { + /* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo] + * Yes, big-endian (the first 16 bits is the MSB). + * (Note that o is split.) + */ + + uint32_t v = read4(addr, RELWR); + uint32_t lovalue = (value/2) & 0x007fffff; + uint32_t hivalue = (value/2) & 0x07800000; + v &= 0xf0800000; + v |= lovalue | (hivalue<<1); + write4(v, addr, RELWR); + } + else if ((opcode & 0xffe0) == 0xe500) + { + /* lea: [1110 0101 000 d:5] [o:32] */ + + write4(value, addr+2, 0); + } + else + assert(0 && "unrecognised VC4 instruction"); +} + /* * The bits in type indicate how many bytes the value occupies and what * significance should be attributed to each byte. @@ -156,27 +256,8 @@ putvalu(valu, addr, type) write2(valu>>16, addr, type); break; case RELOVC4: - { - long i = read4(addr, type); - if (i & 0x00800000) - { - /* Branch instruction. */ - unsigned v = (valu/2) & 0x007fffff; - i &= ~0x007fffff; - i |= v; - } - else - { - /* Branch-link instruction. */ - unsigned v = (valu/2) & 0x07ffffff; - unsigned hiv = v >> 23; - unsigned lov = v & 0x007fffff; - i &= ~0x0f7fffff; - i |= (lov>>16) | (hiv<<24); - } - write4(i, addr, type); + put_vc4_valu(addr, valu); break; - } default: fatal("bad relocation size"); }