Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!

--HG--
branch : dtrg-videocore
This commit is contained in:
David Given 2013-05-21 23:17:30 +01:00
parent 1312fe298b
commit 5e9102955c
4 changed files with 187 additions and 53 deletions

View file

@ -7,6 +7,11 @@
#define maskx(v, x) (v & ((1<<(x))-1))
static void toobig(void)
{
serror("offset too big to encode into instruction");
}
/* Assemble an ALU instruction where rb is a register. */
void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
void branch_instr(int bl, int cc, struct expr_t* expr)
{
quad pc = DOTVAL;
quad type = expr->typ & S_TYP;
/* Sanity checking. */
@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
/* The VC4 branch instructions express distance in 2-byte
* words. */
int d = (expr->val - DOTVAL) / 2;
int d = (expr->val - pc) / 2;
/* We now know the worst case for the instruction layout. At
* this point we can emit the instructions, which may shrink
@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
* close enough to the program counter, we can use a short-
* form instruction. */
if ((d >= -128) && (d < 127))
if (fitx(d, 7))
{
emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
break;
@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
/* Absolute addresses and references to other sections
* need the full 32 bits. */
newrelo(expr->typ, RELOVC4 | RELPC);
newrelo(expr->typ, RELOVC4|RELPC);
if (bl)
{
quad v = d & 0x07ffffff;
quad hiv = v >> 23;
quad lov = v & 0x007fffff;
quad v, hiv, lov;
if (!fitx(d, 27))
toobig();
v = maskx(d, 27);
hiv = v >> 23;
lov = v & 0x007fffff;
emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
emit2(B16(00000000,00000000) | (lov&0xffff));
}
else
{
quad v = d & 0x007fffff;
quad v;
if (!fitx(d, 23))
toobig();
v = maskx(d, 23);
emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
emit2(B16(00000000,00000000) | (v&0xffff));
}
@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
{
static const char sizes[] = {4, 2, 1, 2};
int size = sizes[opcode];
quad type = expr->typ & S_TYP;
/* Sanity checking. */
@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
case 1:
case 2:
{
/* The VC4 branch instructions express distance in 2-byte
* words. */
int d = (expr->val - DOTVAL) / 2;
int d = expr->val - DOTVAL;
/* We now know the worst case for the instruction layout. At
* this point we can emit the instructions, which may shrink
@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
if (type == DOTTYP)
{
int scaledd = d/size;
/* This is a reference to an address within this section. If
* it's close enough to the program counter, we can use a
* shorter instruction. */
if (fitx(d, 16))
if (fitx(scaledd, 16))
{
emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
emit2(d);
emit2(scaledd);
return;
}
}
/* Otherwise we need the full 48 bits. */
if (!fitx(d, 27))
serror("offset too big to encode into instruction");
newrelo(expr->typ, RELOVC4|RELPC);
newrelo(expr->typ, RELOVC4 | RELPC);
/* VC4 relocations store the PC-relative delta into the
* destination section in the instruction data. The linker will
* massage this, and scale it appropriately. */
if (!fitx(d, 27))
toobig();
emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
emit4((31<<27) | maskx(d, 27));
@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs)
void lea_address_instr(int rd, struct expr_t* expr)
{
newrelo(expr->typ, RELOVC4);
quad pc = DOTVAL;
quad type = expr->typ & S_TYP;
if (type == S_ABS)
serror("can't use absolute addresses here");
newrelo(expr->typ, RELOVC4|RELPC);
/* VC4 relocations store the PC-relative delta into the
* destination section in the instruction data. The linker will
* massage this, and scale it appropriately. */
emit2(B16(11100101,00000000) | (rd<<0));
emit4(expr->val);
emit4(expr->val - pc);
}

View file

@ -7,3 +7,8 @@
*/
#include "videocore.h"
.define __dummy
.sect .data
__dummy:

View file

@ -16,6 +16,10 @@
.sect .text
begtext:
lea r15, begtext
st sp, .returnsp
st lr, .returnlr
#if 0
! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
@ -33,6 +37,12 @@ begtext:
#endif
b __m_a_i_n
.define __exit
__exit:
ld sp, .returnsp
ld lr, .returnlr
b lr
! Define symbols at the beginning of our various segments, so that we can find
! them. (Except .text, which has already been done.)
@ -47,3 +57,9 @@ begtext:
.comm .trppc, 4
.comm .ignmask, 4
.comm _errno, 4
! We store the stack pointer and return address on entry so that we can
! cleanly exit.
.comm .returnsp, 4
.comm .returnlr, 4

View file

@ -8,6 +8,7 @@ static char rcsid[] = "$Id$";
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include "out.h"
#include "const.h"
@ -44,6 +45,65 @@ static long read4(char* addr, int type)
return ((long)word1 << (2 * WIDTH)) + word0;
}
/* VideoCore 4 fixups are complex as we need to patch the instruction in
* one of several different ways (depending on what the instruction is).
*/
static long get_vc4_valu(char* addr)
{
uint16_t opcode = read2(addr, 0);
if ((opcode & 0xff00) == 0xe700)
{
/* ld<w> rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27]
* st<w> rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27]
*/
int32_t value = read4(addr+2, 0);
value &= 0x07ffffff;
value = value<<5>>5;
return value;
}
if ((opcode & 0xf080) == 0x9000)
{
/* b<cc> $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
* Yes, big-endian (the first 16 bits is the MSB).
*/
uint32_t value = read4(addr, RELWR);
value &= 0x007fffff;
value = value<<9>>9;
value *= 2;
return value;
}
if ((opcode & 0xf080) == 0x9080)
{
/* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
* Yes, big-endian (the first 16 bits is the MSB).
* (Note that o is split.)
*/
int32_t value = read4(addr, RELWR);
int32_t lov = value & 0x007fffff;
int32_t hiv = value & 0x0f000000;
value = lov | (hiv>>1);
value = value<<5>>5;
value *= 2;
return value;
}
if ((opcode & 0xffe0) == 0xe500)
{
/* lea: [1110 0101 000 d:5] [o:32] */
return read4(addr+2, 0);
}
assert(0 && "unrecognised VC4 instruction");
}
/*
* The bits in type indicate how many bytes the value occupies and what
* significance should be attributed to each byte.
@ -65,21 +125,7 @@ getvalu(addr, type)
case RELOH2:
return read2(addr, type) << 16;
case RELOVC4:
{
long i = read4(addr, type);
if (i & 0x00800000)
{
/* Branch instruction. */
return (i<<9)>>9;
}
else
{
/* Branch-link instruction. */
long hi = (i<<4)>>28;
long lo = (i & 0x007fffff);
return lo | (hi<<23);
}
}
return get_vc4_valu(addr);
default:
fatal("bad relocation size");
}
@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type)
}
}
/* VideoCore 4 fixups are complex as we need to patch the instruction in
* one of several different ways (depending on what the instruction is).
*/
static void put_vc4_valu(char* addr, long value)
{
uint16_t opcode = read2(addr, 0);
if ((opcode & 0xff00) == 0xe700)
{
/* ld<w> rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27]
* st<w> rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27]
*/
uint32_t v = read4(addr+2, 0);
v &= 0xf8000000;
v |= value & 0x07ffffff;
write4(v, addr+2, 0);
}
else if ((opcode & 0xf080) == 0x9000)
{
/* b<cc> dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
* Yes, big-endian (the first 16 bits is the MSB).
*/
uint32_t v = read4(addr, RELWR);
v &= 0xff800000;
v |= (value/2) & 0x007fffff;
write4(v, addr, RELWR);
}
else if ((opcode & 0xf080) == 0x9080)
{
/* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
* Yes, big-endian (the first 16 bits is the MSB).
* (Note that o is split.)
*/
uint32_t v = read4(addr, RELWR);
uint32_t lovalue = (value/2) & 0x007fffff;
uint32_t hivalue = (value/2) & 0x07800000;
v &= 0xf0800000;
v |= lovalue | (hivalue<<1);
write4(v, addr, RELWR);
}
else if ((opcode & 0xffe0) == 0xe500)
{
/* lea: [1110 0101 000 d:5] [o:32] */
write4(value, addr+2, 0);
}
else
assert(0 && "unrecognised VC4 instruction");
}
/*
* The bits in type indicate how many bytes the value occupies and what
* significance should be attributed to each byte.
@ -156,27 +256,8 @@ putvalu(valu, addr, type)
write2(valu>>16, addr, type);
break;
case RELOVC4:
{
long i = read4(addr, type);
if (i & 0x00800000)
{
/* Branch instruction. */
unsigned v = (valu/2) & 0x007fffff;
i &= ~0x007fffff;
i |= v;
}
else
{
/* Branch-link instruction. */
unsigned v = (valu/2) & 0x07ffffff;
unsigned hiv = v >> 23;
unsigned lov = v & 0x007fffff;
i &= ~0x0f7fffff;
i |= (lov>>16) | (hiv<<24);
}
write4(i, addr, type);
put_vc4_valu(addr, valu);
break;
}
default:
fatal("bad relocation size");
}