Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!
--HG-- branch : dtrg-videocore
This commit is contained in:
parent
1312fe298b
commit
5e9102955c
|
@ -7,6 +7,11 @@
|
|||
|
||||
#define maskx(v, x) (v & ((1<<(x))-1))
|
||||
|
||||
static void toobig(void)
|
||||
{
|
||||
serror("offset too big to encode into instruction");
|
||||
}
|
||||
|
||||
/* Assemble an ALU instruction where rb is a register. */
|
||||
|
||||
void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
|
||||
|
@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
|
|||
|
||||
void branch_instr(int bl, int cc, struct expr_t* expr)
|
||||
{
|
||||
quad pc = DOTVAL;
|
||||
quad type = expr->typ & S_TYP;
|
||||
|
||||
/* Sanity checking. */
|
||||
|
@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
|||
/* The VC4 branch instructions express distance in 2-byte
|
||||
* words. */
|
||||
|
||||
int d = (expr->val - DOTVAL) / 2;
|
||||
int d = (expr->val - pc) / 2;
|
||||
|
||||
/* We now know the worst case for the instruction layout. At
|
||||
* this point we can emit the instructions, which may shrink
|
||||
|
@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
|||
* close enough to the program counter, we can use a short-
|
||||
* form instruction. */
|
||||
|
||||
if ((d >= -128) && (d < 127))
|
||||
if (fitx(d, 7))
|
||||
{
|
||||
emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
|
||||
break;
|
||||
|
@ -136,19 +142,29 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
|
|||
/* Absolute addresses and references to other sections
|
||||
* need the full 32 bits. */
|
||||
|
||||
newrelo(expr->typ, RELOVC4 | RELPC);
|
||||
newrelo(expr->typ, RELOVC4|RELPC);
|
||||
|
||||
if (bl)
|
||||
{
|
||||
quad v = d & 0x07ffffff;
|
||||
quad hiv = v >> 23;
|
||||
quad lov = v & 0x007fffff;
|
||||
quad v, hiv, lov;
|
||||
|
||||
if (!fitx(d, 27))
|
||||
toobig();
|
||||
|
||||
v = maskx(d, 27);
|
||||
hiv = v >> 23;
|
||||
lov = v & 0x007fffff;
|
||||
emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
|
||||
emit2(B16(00000000,00000000) | (lov&0xffff));
|
||||
}
|
||||
else
|
||||
{
|
||||
quad v = d & 0x007fffff;
|
||||
quad v;
|
||||
|
||||
if (!fitx(d, 23))
|
||||
toobig();
|
||||
|
||||
v = maskx(d, 23);
|
||||
emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
|
||||
emit2(B16(00000000,00000000) | (v&0xffff));
|
||||
}
|
||||
|
@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
|
|||
|
||||
void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
||||
{
|
||||
static const char sizes[] = {4, 2, 1, 2};
|
||||
int size = sizes[opcode];
|
||||
quad type = expr->typ & S_TYP;
|
||||
|
||||
/* Sanity checking. */
|
||||
|
@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
|||
case 1:
|
||||
case 2:
|
||||
{
|
||||
/* The VC4 branch instructions express distance in 2-byte
|
||||
* words. */
|
||||
|
||||
int d = (expr->val - DOTVAL) / 2;
|
||||
int d = expr->val - DOTVAL;
|
||||
|
||||
/* We now know the worst case for the instruction layout. At
|
||||
* this point we can emit the instructions, which may shrink
|
||||
|
@ -365,24 +380,30 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
|
|||
|
||||
if (type == DOTTYP)
|
||||
{
|
||||
int scaledd = d/size;
|
||||
|
||||
/* This is a reference to an address within this section. If
|
||||
* it's close enough to the program counter, we can use a
|
||||
* shorter instruction. */
|
||||
|
||||
if (fitx(d, 16))
|
||||
if (fitx(scaledd, 16))
|
||||
{
|
||||
emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
|
||||
emit2(d);
|
||||
emit2(scaledd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise we need the full 48 bits. */
|
||||
|
||||
if (!fitx(d, 27))
|
||||
serror("offset too big to encode into instruction");
|
||||
newrelo(expr->typ, RELOVC4|RELPC);
|
||||
|
||||
newrelo(expr->typ, RELOVC4 | RELPC);
|
||||
/* VC4 relocations store the PC-relative delta into the
|
||||
* destination section in the instruction data. The linker will
|
||||
* massage this, and scale it appropriately. */
|
||||
|
||||
if (!fitx(d, 27))
|
||||
toobig();
|
||||
|
||||
emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
|
||||
emit4((31<<27) | maskx(d, 27));
|
||||
|
@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs)
|
|||
|
||||
void lea_address_instr(int rd, struct expr_t* expr)
|
||||
{
|
||||
newrelo(expr->typ, RELOVC4);
|
||||
quad pc = DOTVAL;
|
||||
quad type = expr->typ & S_TYP;
|
||||
|
||||
if (type == S_ABS)
|
||||
serror("can't use absolute addresses here");
|
||||
|
||||
newrelo(expr->typ, RELOVC4|RELPC);
|
||||
|
||||
/* VC4 relocations store the PC-relative delta into the
|
||||
* destination section in the instruction data. The linker will
|
||||
* massage this, and scale it appropriately. */
|
||||
|
||||
emit2(B16(11100101,00000000) | (rd<<0));
|
||||
emit4(expr->val);
|
||||
emit4(expr->val - pc);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,3 +7,8 @@
|
|||
*/
|
||||
|
||||
#include "videocore.h"
|
||||
|
||||
.define __dummy
|
||||
.sect .data
|
||||
__dummy:
|
||||
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
.sect .text
|
||||
|
||||
begtext:
|
||||
lea r15, begtext
|
||||
st sp, .returnsp
|
||||
st lr, .returnlr
|
||||
|
||||
#if 0
|
||||
! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
|
||||
|
||||
|
@ -33,6 +37,12 @@ begtext:
|
|||
#endif
|
||||
b __m_a_i_n
|
||||
|
||||
.define __exit
|
||||
__exit:
|
||||
ld sp, .returnsp
|
||||
ld lr, .returnlr
|
||||
b lr
|
||||
|
||||
! Define symbols at the beginning of our various segments, so that we can find
|
||||
! them. (Except .text, which has already been done.)
|
||||
|
||||
|
@ -47,3 +57,9 @@ begtext:
|
|||
.comm .trppc, 4
|
||||
.comm .ignmask, 4
|
||||
.comm _errno, 4
|
||||
|
||||
! We store the stack pointer and return address on entry so that we can
|
||||
! cleanly exit.
|
||||
|
||||
.comm .returnsp, 4
|
||||
.comm .returnlr, 4
|
||||
|
|
|
@ -8,6 +8,7 @@ static char rcsid[] = "$Id$";
|
|||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include "out.h"
|
||||
#include "const.h"
|
||||
|
@ -44,6 +45,65 @@ static long read4(char* addr, int type)
|
|||
return ((long)word1 << (2 * WIDTH)) + word0;
|
||||
}
|
||||
|
||||
/* VideoCore 4 fixups are complex as we need to patch the instruction in
|
||||
* one of several different ways (depending on what the instruction is).
|
||||
*/
|
||||
|
||||
static long get_vc4_valu(char* addr)
|
||||
{
|
||||
uint16_t opcode = read2(addr, 0);
|
||||
|
||||
if ((opcode & 0xff00) == 0xe700)
|
||||
{
|
||||
/* ld<w> rd, $+o: [1110 0111 ww 0 d:5] [11111 o:27]
|
||||
* st<w> rd, $+o: [1110 0111 ww 1 d:5] [11111 o:27]
|
||||
*/
|
||||
|
||||
int32_t value = read4(addr+2, 0);
|
||||
value &= 0x07ffffff;
|
||||
value = value<<5>>5;
|
||||
return value;
|
||||
}
|
||||
|
||||
if ((opcode & 0xf080) == 0x9000)
|
||||
{
|
||||
/* b<cc> $+o*2: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
|
||||
* Yes, big-endian (the first 16 bits is the MSB).
|
||||
*/
|
||||
|
||||
uint32_t value = read4(addr, RELWR);
|
||||
value &= 0x007fffff;
|
||||
value = value<<9>>9;
|
||||
value *= 2;
|
||||
return value;
|
||||
}
|
||||
|
||||
if ((opcode & 0xf080) == 0x9080)
|
||||
{
|
||||
/* bl $+o*2: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
|
||||
* Yes, big-endian (the first 16 bits is the MSB).
|
||||
* (Note that o is split.)
|
||||
*/
|
||||
|
||||
int32_t value = read4(addr, RELWR);
|
||||
int32_t lov = value & 0x007fffff;
|
||||
int32_t hiv = value & 0x0f000000;
|
||||
value = lov | (hiv>>1);
|
||||
value = value<<5>>5;
|
||||
value *= 2;
|
||||
return value;
|
||||
}
|
||||
|
||||
if ((opcode & 0xffe0) == 0xe500)
|
||||
{
|
||||
/* lea: [1110 0101 000 d:5] [o:32] */
|
||||
|
||||
return read4(addr+2, 0);
|
||||
}
|
||||
|
||||
assert(0 && "unrecognised VC4 instruction");
|
||||
}
|
||||
|
||||
/*
|
||||
* The bits in type indicate how many bytes the value occupies and what
|
||||
* significance should be attributed to each byte.
|
||||
|
@ -65,21 +125,7 @@ getvalu(addr, type)
|
|||
case RELOH2:
|
||||
return read2(addr, type) << 16;
|
||||
case RELOVC4:
|
||||
{
|
||||
long i = read4(addr, type);
|
||||
if (i & 0x00800000)
|
||||
{
|
||||
/* Branch instruction. */
|
||||
return (i<<9)>>9;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Branch-link instruction. */
|
||||
long hi = (i<<4)>>28;
|
||||
long lo = (i & 0x007fffff);
|
||||
return lo | (hi<<23);
|
||||
}
|
||||
}
|
||||
return get_vc4_valu(addr);
|
||||
default:
|
||||
fatal("bad relocation size");
|
||||
}
|
||||
|
@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type)
|
|||
}
|
||||
}
|
||||
|
||||
/* VideoCore 4 fixups are complex as we need to patch the instruction in
|
||||
* one of several different ways (depending on what the instruction is).
|
||||
*/
|
||||
|
||||
static void put_vc4_valu(char* addr, long value)
|
||||
{
|
||||
uint16_t opcode = read2(addr, 0);
|
||||
|
||||
if ((opcode & 0xff00) == 0xe700)
|
||||
{
|
||||
/* ld<w> rd, o, (pc): [1110 0111 ww 0 d:5] [11111 o:27]
|
||||
* st<w> rd, o, (pc): [1110 0111 ww 1 d:5] [11111 o:27]
|
||||
*/
|
||||
|
||||
uint32_t v = read4(addr+2, 0);
|
||||
v &= 0xf8000000;
|
||||
v |= value & 0x07ffffff;
|
||||
write4(v, addr+2, 0);
|
||||
}
|
||||
else if ((opcode & 0xf080) == 0x9000)
|
||||
{
|
||||
/* b<cc> dest: [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
|
||||
* Yes, big-endian (the first 16 bits is the MSB).
|
||||
*/
|
||||
|
||||
uint32_t v = read4(addr, RELWR);
|
||||
v &= 0xff800000;
|
||||
v |= (value/2) & 0x007fffff;
|
||||
write4(v, addr, RELWR);
|
||||
}
|
||||
else if ((opcode & 0xf080) == 0x9080)
|
||||
{
|
||||
/* bl dest: [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
|
||||
* Yes, big-endian (the first 16 bits is the MSB).
|
||||
* (Note that o is split.)
|
||||
*/
|
||||
|
||||
uint32_t v = read4(addr, RELWR);
|
||||
uint32_t lovalue = (value/2) & 0x007fffff;
|
||||
uint32_t hivalue = (value/2) & 0x07800000;
|
||||
v &= 0xf0800000;
|
||||
v |= lovalue | (hivalue<<1);
|
||||
write4(v, addr, RELWR);
|
||||
}
|
||||
else if ((opcode & 0xffe0) == 0xe500)
|
||||
{
|
||||
/* lea: [1110 0101 000 d:5] [o:32] */
|
||||
|
||||
write4(value, addr+2, 0);
|
||||
}
|
||||
else
|
||||
assert(0 && "unrecognised VC4 instruction");
|
||||
}
|
||||
|
||||
/*
|
||||
* The bits in type indicate how many bytes the value occupies and what
|
||||
* significance should be attributed to each byte.
|
||||
|
@ -156,27 +256,8 @@ putvalu(valu, addr, type)
|
|||
write2(valu>>16, addr, type);
|
||||
break;
|
||||
case RELOVC4:
|
||||
{
|
||||
long i = read4(addr, type);
|
||||
if (i & 0x00800000)
|
||||
{
|
||||
/* Branch instruction. */
|
||||
unsigned v = (valu/2) & 0x007fffff;
|
||||
i &= ~0x007fffff;
|
||||
i |= v;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Branch-link instruction. */
|
||||
unsigned v = (valu/2) & 0x07ffffff;
|
||||
unsigned hiv = v >> 23;
|
||||
unsigned lov = v & 0x007fffff;
|
||||
i &= ~0x0f7fffff;
|
||||
i |= (lov>>16) | (hiv<<24);
|
||||
}
|
||||
write4(i, addr, type);
|
||||
put_vc4_valu(addr, valu);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fatal("bad relocation size");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue