Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!
--HG-- branch : dtrg-videocore
This commit is contained in:
parent
1312fe298b
commit
5e9102955c
|
@ -7,6 +7,11 @@
|
||||||
|
|
||||||
#define maskx(v, x) (v & ((1<<(x))-1))
|
#define maskx(v, x) (v & ((1<<(x))-1))
|
||||||
|
|
||||||
|
static void toobig(void)
|
||||||
|
{
|
||||||
|
serror("offset too big to encode into instruction");
|
||||||
|
}
|
||||||
|
|
||||||
/* Assemble an ALU instruction where rb is a register. */
|
/* Assemble an ALU instruction where rb is a register. */
|
||||||
|
|
||||||
void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
|
void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
|
||||||
|
@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
|
||||||
|
|
||||||
void branch_instr(int bl, int cc, struct expr_t* expr)
|
void branch_instr(int bl, int cc, struct expr_t* expr)
|
||||||
{
|
{
|
||||||
|
quad pc = DOTVAL;
|
||||||
quad type = expr->typ & S_TYP;
|
quad type = expr->typ & S_TYP;
|
||||||
|
|
||||||
/* Sanity checking. */
|
/* Sanity checking. */
|
||||||
|
@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
||||||
/* The VC4 branch instructions express distance in 2-byte
|
/* The VC4 branch instructions express distance in 2-byte
|
||||||
* words. */
|
* words. */
|
||||||
|
|
||||||
int d = (expr->val - DOTVAL) / 2;
|
int d = (expr->val - pc) / 2;
|
||||||
|
|
||||||
/* We now know the worst case for the instruction layout. At
|
/* We now know the worst case for the instruction layout. At
|
||||||
* this point we can emit the instructions, which may shrink
|
* this point we can emit the instructions, which may shrink
|
||||||
|
@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
||||||
* close enough to the program counter, we can use a short-
|
* close enough to the program counter, we can use a short-
|
||||||
* form instruction. */
|
* form instruction. */
|
||||||
|
|
||||||
if ((d >= -128) && (d < 127))
|
if (fitx(d, 7))
|
||||||
{
|
{
|
||||||
emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
|
emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
|
||||||
break;
|
break;
|
||||||
|
@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
||||||
/* Absolute addresses and references to other sections
|
/* Absolute addresses and references to other sections
|
||||||
* need the full 32 bits. */
|
* need the full 32 bits. */
|
||||||
|
|
||||||
newrelo(expr->typ, RELOVC4 | RELPC);
|
newrelo(expr->typ, RELOVC4|RELPC);
|
||||||
|
|
||||||
if (bl)
|
if (bl)
|
||||||
{
|
{
|
||||||
quad v = d & 0x07ffffff;
|
quad v, hiv, lov;
|
||||||
quad hiv = v >> 23;
|
|
||||||
quad lov = v & 0x007fffff;
|
if (!fitx(d, 27))
|
||||||
|
toobig();
|
||||||
|
|
||||||
|
v = maskx(d, 27);
|
||||||
|
hiv = v >> 23;
|
||||||
|
lov = v & 0x007fffff;
|
||||||
emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
|
emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
|
||||||
emit2(B16(00000000,00000000) | (lov&0xffff));
|
emit2(B16(00000000,00000000) | (lov&0xffff));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
quad v = d & 0x007fffff;
|
quad v;
|
||||||
|
|
||||||
|
if (!fitx(d, 23))
|
||||||
|
toobig();
|
||||||
|
|
||||||
|
v = maskx(d, 23);
|
||||||
emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
|
emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
|
||||||
emit2(B16(00000000,00000000) | (v&0xffff));
|
emit2(B16(00000000,00000000) | (v&0xffff));
|
||||||
}
|
}
|
||||||
|
@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
|
||||||
|
|
||||||
void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
||||||
{
|
{
|
||||||
|
static const char sizes[] = {4, 2, 1, 2};
|
||||||
|
int size = sizes[opcode];
|
||||||
quad type = expr->typ & S_TYP;
|
quad type = expr->typ & S_TYP;
|
||||||
|
|
||||||
/* Sanity checking. */
|
/* Sanity checking. */
|
||||||
|
@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
||||||
case 1:
|
case 1:
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
/* The VC4 branch instructions express distance in 2-byte
|
int d = expr->val - DOTVAL;
|
||||||
* words. */
|
|
||||||
|
|
||||||
int d = (expr->val - DOTVAL) / 2;
|
|
||||||
|
|
||||||
/* We now know the worst case for the instruction layout. At
|
/* We now know the worst case for the instruction layout. At
|
||||||
* this point we can emit the instructions, which may shrink
|
* this point we can emit the instructions, which may shrink
|
||||||
|
@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
||||||
|
|
||||||
if (type == DOTTYP)
|
if (type == DOTTYP)
|
||||||
{
|
{
|
||||||
|
int scaledd = d/size;
|
||||||
|
|
||||||
/* This is a reference to an address within this section. If
|
/* This is a reference to an address within this section. If
|
||||||
* it's close enough to the program counter, we can use a
|
* it's close enough to the program counter, we can use a
|
||||||
* shorter instruction. */
|
* shorter instruction. */
|
||||||
|
|
||||||
if (fitx(d, 16))
|
if (fitx(scaledd, 16))
|
||||||
{
|
{
|
||||||
emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
|
emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
|
||||||
emit2(d);
|
emit2(scaledd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise we need the full 48 bits. */
|
/* Otherwise we need the full 48 bits. */
|
||||||
|
|
||||||
if (!fitx(d, 27))
|
newrelo(expr->typ, RELOVC4|RELPC);
|
||||||
serror("offset too big to encode into instruction");
|
|
||||||
|
|
||||||
newrelo(expr->typ, RELOVC4 | RELPC);
|
/* VC4 relocations store the PC-relative delta into the
|
||||||
|
* destination section in the instruction data. The linker will
|
||||||
|
* massage this, and scale it appropriately. */
|
||||||
|
|
||||||
|
if (!fitx(d, 27))
|
||||||
|
toobig();
|
||||||
|
|
||||||
emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
|
emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
|
||||||
emit4((31<<27) | maskx(d, 27));
|
emit4((31<<27) | maskx(d, 27));
|
||||||
|
@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs)
|
||||||
|
|
||||||
void lea_address_instr(int rd, struct expr_t* expr)
|
void lea_address_instr(int rd, struct expr_t* expr)
|
||||||
{
|
{
|
||||||
newrelo(expr->typ, RELOVC4);
|
quad pc = DOTVAL;
|
||||||
|
quad type = expr->typ & S_TYP;
|
||||||
|
|
||||||
|
if (type == S_ABS)
|
||||||
|
serror("can't use absolute addresses here");
|
||||||
|
|
||||||
|
newrelo(expr->typ, RELOVC4|RELPC);
|
||||||
|
|
||||||
|
/* VC4 relocations store the PC-relative delta into the
|
||||||
|
* destination section in the instruction data. The linker will
|
||||||
|
* massage this, and scale it appropriately. */
|
||||||
|
|
||||||
emit2(B16(11100101,00000000) | (rd<<0));
|
emit2(B16(11100101,00000000) | (rd<<0));
|
||||||
emit4(expr->val);
|
emit4(expr->val - pc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,3 +7,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "videocore.h"
|
#include "videocore.h"
|
||||||
|
|
||||||
|
.define __dummy
|
||||||
|
.sect .data
|
||||||
|
__dummy:
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,10 @@
|
||||||
.sect .text
|
.sect .text
|
||||||
|
|
||||||
begtext:
|
begtext:
|
||||||
|
lea r15, begtext
|
||||||
|
st sp, .returnsp
|
||||||
|
st lr, .returnlr
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
|
! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
|
||||||
|
|
||||||
|
@ -33,6 +37,12 @@ begtext:
|
||||||
#endif
|
#endif
|
||||||
b __m_a_i_n
|
b __m_a_i_n
|
||||||
|
|
||||||
|
.define __exit
|
||||||
|
__exit:
|
||||||
|
ld sp, .returnsp
|
||||||
|
ld lr, .returnlr
|
||||||
|
b lr
|
||||||
|
|
||||||
! Define symbols at the beginning of our various segments, so that we can find
|
! Define symbols at the beginning of our various segments, so that we can find
|
||||||
! them. (Except .text, which has already been done.)
|
! them. (Except .text, which has already been done.)
|
||||||
|
|
||||||
|
@ -47,3 +57,9 @@ begtext:
|
||||||
.comm .trppc, 4
|
.comm .trppc, 4
|
||||||
.comm .ignmask, 4
|
.comm .ignmask, 4
|
||||||
.comm _errno, 4
|
.comm _errno, 4
|
||||||
|
|
||||||
|
! We store the stack pointer and return address on entry so that we can
|
||||||
|
! cleanly exit.
|
||||||
|
|
||||||
|
.comm .returnsp, 4
|
||||||
|
.comm .returnlr, 4
|
||||||
|
|
|
@ -8,6 +8,7 @@ static char rcsid[] = "$Id$";
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "out.h"
|
#include "out.h"
|
||||||
#include "const.h"
|
#include "const.h"
|
||||||
|
@ -44,6 +45,65 @@ static long read4(char* addr, int type)
|
||||||
return ((long)word1 << (2 * WIDTH)) + word0;
|
return ((long)word1 << (2 * WIDTH)) + word0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* VideoCore 4 fixups are complex as we need to patch the instruction in
|
||||||
|
* one of several different ways (depending on what the instruction is).
|
||||||
|
*/
|
||||||
|
|
||||||
|
static long get_vc4_valu(char* addr)
|
||||||
|
{
|
||||||
|
uint16_t opcode = read2(addr, 0);
|
||||||
|
|
||||||
|
if ((opcode & 0xff00) == 0xe700)
|
||||||
|
{
|
||||||
|
/* ld<w> rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27]
|
||||||
|
* st<w> rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27]
|
||||||
|
*/
|
||||||
|
|
||||||
|
int32_t value = read4(addr+2, 0);
|
||||||
|
value &= 0x07ffffff;
|
||||||
|
value = value<<5>>5;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((opcode & 0xf080) == 0x9000)
|
||||||
|
{
|
||||||
|
/* b<cc> $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
|
||||||
|
* Yes, big-endian (the first 16 bits is the MSB).
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint32_t value = read4(addr, RELWR);
|
||||||
|
value &= 0x007fffff;
|
||||||
|
value = value<<9>>9;
|
||||||
|
value *= 2;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((opcode & 0xf080) == 0x9080)
|
||||||
|
{
|
||||||
|
/* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
|
||||||
|
* Yes, big-endian (the first 16 bits is the MSB).
|
||||||
|
* (Note that o is split.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
int32_t value = read4(addr, RELWR);
|
||||||
|
int32_t lov = value & 0x007fffff;
|
||||||
|
int32_t hiv = value & 0x0f000000;
|
||||||
|
value = lov | (hiv>>1);
|
||||||
|
value = value<<5>>5;
|
||||||
|
value *= 2;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((opcode & 0xffe0) == 0xe500)
|
||||||
|
{
|
||||||
|
/* lea: [1110 0101 000 d:5] [o:32] */
|
||||||
|
|
||||||
|
return read4(addr+2, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(0 && "unrecognised VC4 instruction");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The bits in type indicate how many bytes the value occupies and what
|
* The bits in type indicate how many bytes the value occupies and what
|
||||||
* significance should be attributed to each byte.
|
* significance should be attributed to each byte.
|
||||||
|
@ -65,21 +125,7 @@ getvalu(addr, type)
|
||||||
case RELOH2:
|
case RELOH2:
|
||||||
return read2(addr, type) << 16;
|
return read2(addr, type) << 16;
|
||||||
case RELOVC4:
|
case RELOVC4:
|
||||||
{
|
return get_vc4_valu(addr);
|
||||||
long i = read4(addr, type);
|
|
||||||
if (i & 0x00800000)
|
|
||||||
{
|
|
||||||
/* Branch instruction. */
|
|
||||||
return (i<<9)>>9;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Branch-link instruction. */
|
|
||||||
long hi = (i<<4)>>28;
|
|
||||||
long lo = (i & 0x007fffff);
|
|
||||||
return lo | (hi<<23);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
fatal("bad relocation size");
|
fatal("bad relocation size");
|
||||||
}
|
}
|
||||||
|
@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* VideoCore 4 fixups are complex as we need to patch the instruction in
|
||||||
|
* one of several different ways (depending on what the instruction is).
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void put_vc4_valu(char* addr, long value)
|
||||||
|
{
|
||||||
|
uint16_t opcode = read2(addr, 0);
|
||||||
|
|
||||||
|
if ((opcode & 0xff00) == 0xe700)
|
||||||
|
{
|
||||||
|
/* ld<w> rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27]
|
||||||
|
* st<w> rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27]
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint32_t v = read4(addr+2, 0);
|
||||||
|
v &= 0xf8000000;
|
||||||
|
v |= value & 0x07ffffff;
|
||||||
|
write4(v, addr+2, 0);
|
||||||
|
}
|
||||||
|
else if ((opcode & 0xf080) == 0x9000)
|
||||||
|
{
|
||||||
|
/* b<cc> dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
|
||||||
|
* Yes, big-endian (the first 16 bits is the MSB).
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint32_t v = read4(addr, RELWR);
|
||||||
|
v &= 0xff800000;
|
||||||
|
v |= (value/2) & 0x007fffff;
|
||||||
|
write4(v, addr, RELWR);
|
||||||
|
}
|
||||||
|
else if ((opcode & 0xf080) == 0x9080)
|
||||||
|
{
|
||||||
|
/* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
|
||||||
|
* Yes, big-endian (the first 16 bits is the MSB).
|
||||||
|
* (Note that o is split.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint32_t v = read4(addr, RELWR);
|
||||||
|
uint32_t lovalue = (value/2) & 0x007fffff;
|
||||||
|
uint32_t hivalue = (value/2) & 0x07800000;
|
||||||
|
v &= 0xf0800000;
|
||||||
|
v |= lovalue | (hivalue<<1);
|
||||||
|
write4(v, addr, RELWR);
|
||||||
|
}
|
||||||
|
else if ((opcode & 0xffe0) == 0xe500)
|
||||||
|
{
|
||||||
|
/* lea: [1110 0101 000 d:5] [o:32] */
|
||||||
|
|
||||||
|
write4(value, addr+2, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
assert(0 && "unrecognised VC4 instruction");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The bits in type indicate how many bytes the value occupies and what
|
* The bits in type indicate how many bytes the value occupies and what
|
||||||
* significance should be attributed to each byte.
|
* significance should be attributed to each byte.
|
||||||
|
@ -156,27 +256,8 @@ putvalu(valu, addr, type)
|
||||||
write2(valu>>16, addr, type);
|
write2(valu>>16, addr, type);
|
||||||
break;
|
break;
|
||||||
case RELOVC4:
|
case RELOVC4:
|
||||||
{
|
put_vc4_valu(addr, valu);
|
||||||
long i = read4(addr, type);
|
|
||||||
if (i & 0x00800000)
|
|
||||||
{
|
|
||||||
/* Branch instruction. */
|
|
||||||
unsigned v = (valu/2) & 0x007fffff;
|
|
||||||
i &= ~0x007fffff;
|
|
||||||
i |= v;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Branch-link instruction. */
|
|
||||||
unsigned v = (valu/2) & 0x07ffffff;
|
|
||||||
unsigned hiv = v >> 23;
|
|
||||||
unsigned lov = v & 0x007fffff;
|
|
||||||
i &= ~0x0f7fffff;
|
|
||||||
i |= (lov>>16) | (hiv<<24);
|
|
||||||
}
|
|
||||||
write4(i, addr, type);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
fatal("bad relocation size");
|
fatal("bad relocation size");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue