Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!
--HG-- branch : dtrg-videocore
This commit is contained in:
		
							parent
							
								
									1312fe298b
								
							
						
					
					
						commit
						5e9102955c
					
				
					 4 changed files with 187 additions and 53 deletions
				
			
		| 
						 | 
				
			
			@ -7,6 +7,11 @@
 | 
			
		|||
 | 
			
		||||
#define maskx(v, x) (v & ((1<<(x))-1))
 | 
			
		||||
 | 
			
		||||
static void toobig(void)
 | 
			
		||||
{
 | 
			
		||||
	serror("offset too big to encode into instruction");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Assemble an ALU instruction where rb is a register. */
 | 
			
		||||
 | 
			
		||||
void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
 | 
			
		||||
| 
						 | 
				
			
			@ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
 | 
			
		|||
 | 
			
		||||
void branch_instr(int bl, int cc, struct expr_t* expr)
 | 
			
		||||
{
 | 
			
		||||
	quad pc = DOTVAL;
 | 
			
		||||
	quad type = expr->typ & S_TYP;
 | 
			
		||||
 | 
			
		||||
	/* Sanity checking. */
 | 
			
		||||
| 
						 | 
				
			
			@ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
 | 
			
		|||
			/* The VC4 branch instructions express distance in 2-byte
 | 
			
		||||
			 * words. */
 | 
			
		||||
 | 
			
		||||
			int d = (expr->val - DOTVAL) / 2;
 | 
			
		||||
			int d = (expr->val - pc) / 2;
 | 
			
		||||
 | 
			
		||||
        	/* We now know the worst case for the instruction layout. At
 | 
			
		||||
        	 * this point we can emit the instructions, which may shrink
 | 
			
		||||
| 
						 | 
				
			
			@ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
 | 
			
		|||
        	     * close enough to the program counter, we can use a short-
 | 
			
		||||
        	     * form instruction. */
 | 
			
		||||
 | 
			
		||||
        	    if ((d >= -128) && (d < 127))
 | 
			
		||||
        	    if (fitx(d, 7))
 | 
			
		||||
        	    {
 | 
			
		||||
					emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
 | 
			
		||||
					break;
 | 
			
		||||
| 
						 | 
				
			
			@ -140,15 +146,25 @@ void branch_instr(int bl, int cc, struct expr_t* expr)
 | 
			
		|||
 | 
			
		||||
			if (bl)
 | 
			
		||||
			{
 | 
			
		||||
				quad v = d & 0x07ffffff;
 | 
			
		||||
				quad hiv = v >> 23;
 | 
			
		||||
				quad lov = v & 0x007fffff;
 | 
			
		||||
				quad v, hiv, lov;
 | 
			
		||||
 | 
			
		||||
				if (!fitx(d, 27))
 | 
			
		||||
					toobig();
 | 
			
		||||
 | 
			
		||||
				v = maskx(d, 27);
 | 
			
		||||
				hiv = v >> 23;
 | 
			
		||||
				lov = v & 0x007fffff;
 | 
			
		||||
				emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
 | 
			
		||||
				emit2(B16(00000000,00000000) | (lov&0xffff));
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				quad v = d & 0x007fffff;
 | 
			
		||||
				quad v;
 | 
			
		||||
 | 
			
		||||
				if (!fitx(d, 23))
 | 
			
		||||
					toobig();
 | 
			
		||||
 | 
			
		||||
				v = maskx(d, 23);
 | 
			
		||||
				emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
 | 
			
		||||
				emit2(B16(00000000,00000000) | (v&0xffff));
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
 | 
			
		|||
 | 
			
		||||
void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 | 
			
		||||
{
 | 
			
		||||
	static const char sizes[] = {4, 2, 1, 2};
 | 
			
		||||
	int size = sizes[opcode];
 | 
			
		||||
	quad type = expr->typ & S_TYP;
 | 
			
		||||
 | 
			
		||||
	/* Sanity checking. */
 | 
			
		||||
| 
						 | 
				
			
			@ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 | 
			
		|||
		case 1:
 | 
			
		||||
		case 2:
 | 
			
		||||
		{
 | 
			
		||||
			/* The VC4 branch instructions express distance in 2-byte
 | 
			
		||||
			 * words. */
 | 
			
		||||
 | 
			
		||||
			int d = (expr->val - DOTVAL) / 2;
 | 
			
		||||
			int d = expr->val - DOTVAL;
 | 
			
		||||
 | 
			
		||||
        	/* We now know the worst case for the instruction layout. At
 | 
			
		||||
        	 * this point we can emit the instructions, which may shrink
 | 
			
		||||
| 
						 | 
				
			
			@ -365,25 +380,31 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 | 
			
		|||
 | 
			
		||||
			if (type == DOTTYP)
 | 
			
		||||
			{
 | 
			
		||||
				int scaledd = d/size;
 | 
			
		||||
 | 
			
		||||
        	    /* This is a reference to an address within this section. If
 | 
			
		||||
        	     * it's close enough to the program counter, we can use a
 | 
			
		||||
        	     * shorter instruction. */
 | 
			
		||||
 | 
			
		||||
				if (fitx(d, 16))
 | 
			
		||||
				if (fitx(scaledd, 16))
 | 
			
		||||
				{
 | 
			
		||||
                    emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
 | 
			
		||||
                    emit2(d);
 | 
			
		||||
                    emit2(scaledd);
 | 
			
		||||
                    return;
 | 
			
		||||
                }
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			/* Otherwise we need the full 48 bits. */
 | 
			
		||||
 | 
			
		||||
            if (!fitx(d, 27))
 | 
			
		||||
                serror("offset too big to encode into instruction");
 | 
			
		||||
 | 
			
		||||
			newrelo(expr->typ, RELOVC4|RELPC);
 | 
			
		||||
 | 
			
		||||
			/* VC4 relocations store the PC-relative delta into the
 | 
			
		||||
			 * destination section in the instruction data. The linker will
 | 
			
		||||
			 * massage this, and scale it appropriately. */
 | 
			
		||||
 | 
			
		||||
            if (!fitx(d, 27))
 | 
			
		||||
				toobig();
 | 
			
		||||
 | 
			
		||||
            emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
 | 
			
		||||
            emit4((31<<27) | maskx(d, 27));
 | 
			
		||||
			break;
 | 
			
		||||
| 
						 | 
				
			
			@ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs)
 | 
			
		|||
 | 
			
		||||
void lea_address_instr(int rd, struct expr_t* expr)
 | 
			
		||||
{
 | 
			
		||||
	newrelo(expr->typ, RELOVC4);
 | 
			
		||||
	quad pc = DOTVAL;
 | 
			
		||||
	quad type = expr->typ & S_TYP;
 | 
			
		||||
 | 
			
		||||
	if (type == S_ABS)
 | 
			
		||||
		serror("can't use absolute addresses here");
 | 
			
		||||
 | 
			
		||||
	newrelo(expr->typ, RELOVC4|RELPC);
 | 
			
		||||
 | 
			
		||||
	/* VC4 relocations store the PC-relative delta into the
 | 
			
		||||
	 * destination section in the instruction data. The linker will
 | 
			
		||||
	 * massage this, and scale it appropriately. */
 | 
			
		||||
 | 
			
		||||
	emit2(B16(11100101,00000000) | (rd<<0));
 | 
			
		||||
	emit4(expr->val);
 | 
			
		||||
	emit4(expr->val - pc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,3 +7,8 @@
 | 
			
		|||
 */
 | 
			
		||||
 | 
			
		||||
#include "videocore.h"
 | 
			
		||||
 | 
			
		||||
.define	__dummy
 | 
			
		||||
.sect .data
 | 
			
		||||
__dummy:
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,6 +16,10 @@
 | 
			
		|||
.sect .text
 | 
			
		||||
 | 
			
		||||
begtext:
 | 
			
		||||
	lea r15, begtext
 | 
			
		||||
	st sp, .returnsp
 | 
			
		||||
	st lr, .returnlr
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
	! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.)
 | 
			
		||||
	
 | 
			
		||||
| 
						 | 
				
			
			@ -33,6 +37,12 @@ begtext:
 | 
			
		|||
#endif
 | 
			
		||||
	b __m_a_i_n
 | 
			
		||||
 | 
			
		||||
.define __exit
 | 
			
		||||
__exit:
 | 
			
		||||
	ld sp, .returnsp
 | 
			
		||||
	ld lr, .returnlr
 | 
			
		||||
	b lr
 | 
			
		||||
 | 
			
		||||
! Define symbols at the beginning of our various segments, so that we can find
 | 
			
		||||
! them. (Except .text, which has already been done.)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -47,3 +57,9 @@ begtext:
 | 
			
		|||
.comm .trppc, 4
 | 
			
		||||
.comm .ignmask, 4
 | 
			
		||||
.comm _errno, 4
 | 
			
		||||
 | 
			
		||||
! We store the stack pointer and return address on entry so that we can
 | 
			
		||||
! cleanly exit.
 | 
			
		||||
 | 
			
		||||
.comm .returnsp, 4
 | 
			
		||||
.comm .returnlr, 4
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,7 @@ static char rcsid[] = "$Id$";
 | 
			
		|||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include <assert.h>
 | 
			
		||||
#include "out.h"
 | 
			
		||||
#include "const.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -44,6 +45,65 @@ static long read4(char* addr, int type)
 | 
			
		|||
		return ((long)word1 << (2 * WIDTH)) + word0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* VideoCore 4 fixups are complex as we need to patch the instruction in
 | 
			
		||||
 * one of several different ways (depending on what the instruction is).
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static long get_vc4_valu(char* addr)
 | 
			
		||||
{
 | 
			
		||||
	uint16_t opcode = read2(addr, 0);
 | 
			
		||||
 | 
			
		||||
	if ((opcode & 0xff00) == 0xe700)
 | 
			
		||||
	{
 | 
			
		||||
		/* ld<w> rd, $+o:  [1110 0111 ww 0 d:5] [11111 o:27]
 | 
			
		||||
		 * st<w> rd, $+o:  [1110 0111 ww 1 d:5] [11111 o:27]
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		int32_t value = read4(addr+2, 0);
 | 
			
		||||
		value &= 0x07ffffff;
 | 
			
		||||
		value = value<<5>>5;
 | 
			
		||||
		return value;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ((opcode & 0xf080) == 0x9000)
 | 
			
		||||
	{
 | 
			
		||||
		/* b<cc> $+o*2:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | 
			
		||||
		 * Yes, big-endian (the first 16 bits is the MSB).
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		uint32_t value = read4(addr, RELWR);
 | 
			
		||||
		value &= 0x007fffff;
 | 
			
		||||
		value = value<<9>>9;
 | 
			
		||||
		value *= 2;
 | 
			
		||||
		return value;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ((opcode & 0xf080) == 0x9080)
 | 
			
		||||
	{
 | 
			
		||||
		/* bl $+o*2:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | 
			
		||||
		 * Yes, big-endian (the first 16 bits is the MSB).
 | 
			
		||||
		 * (Note that o is split.)
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		int32_t value = read4(addr, RELWR);
 | 
			
		||||
		int32_t lov = value & 0x007fffff;
 | 
			
		||||
		int32_t hiv = value & 0x0f000000;
 | 
			
		||||
		value = lov | (hiv>>1);
 | 
			
		||||
		value = value<<5>>5;
 | 
			
		||||
		value *= 2;
 | 
			
		||||
		return value;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if ((opcode & 0xffe0) == 0xe500)
 | 
			
		||||
	{
 | 
			
		||||
        /* lea: [1110 0101 000 d:5] [o:32] */
 | 
			
		||||
 | 
			
		||||
        return read4(addr+2, 0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
	assert(0 && "unrecognised VC4 instruction");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The bits in type indicate how many bytes the value occupies and what
 | 
			
		||||
 * significance should be attributed to each byte.
 | 
			
		||||
| 
						 | 
				
			
			@ -65,21 +125,7 @@ getvalu(addr, type)
 | 
			
		|||
	case RELOH2:
 | 
			
		||||
		return read2(addr, type) << 16;
 | 
			
		||||
	case RELOVC4:
 | 
			
		||||
	{
 | 
			
		||||
		long i = read4(addr, type);
 | 
			
		||||
		if (i & 0x00800000)
 | 
			
		||||
		{
 | 
			
		||||
            /* Branch instruction. */
 | 
			
		||||
            return (i<<9)>>9;
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			/* Branch-link instruction. */
 | 
			
		||||
			long hi = (i<<4)>>28;
 | 
			
		||||
			long lo = (i & 0x007fffff);
 | 
			
		||||
			return lo | (hi<<23);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
		return get_vc4_valu(addr);
 | 
			
		||||
	default:
 | 
			
		||||
		fatal("bad relocation size");
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* VideoCore 4 fixups are complex as we need to patch the instruction in
 | 
			
		||||
 * one of several different ways (depending on what the instruction is).
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static void put_vc4_valu(char* addr, long value)
 | 
			
		||||
{
 | 
			
		||||
	uint16_t opcode = read2(addr, 0);
 | 
			
		||||
 | 
			
		||||
	if ((opcode & 0xff00) == 0xe700)
 | 
			
		||||
	{
 | 
			
		||||
		/* ld<w> rd, o, (pc):  [1110 0111 ww 0 d:5] [11111 o:27]
 | 
			
		||||
		 * st<w> rd, o, (pc):  [1110 0111 ww 1 d:5] [11111 o:27]
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		uint32_t v = read4(addr+2, 0);
 | 
			
		||||
		v &= 0xf8000000;
 | 
			
		||||
		v |= value & 0x07ffffff;
 | 
			
		||||
		write4(v, addr+2, 0);
 | 
			
		||||
	}
 | 
			
		||||
	else if ((opcode & 0xf080) == 0x9000)
 | 
			
		||||
	{
 | 
			
		||||
		/* b<cc> dest:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | 
			
		||||
		 * Yes, big-endian (the first 16 bits is the MSB).
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		uint32_t v = read4(addr, RELWR);
 | 
			
		||||
		v &= 0xff800000;
 | 
			
		||||
		v |= (value/2) & 0x007fffff;
 | 
			
		||||
		write4(v, addr, RELWR);
 | 
			
		||||
	}
 | 
			
		||||
	else if ((opcode & 0xf080) == 0x9080)
 | 
			
		||||
	{
 | 
			
		||||
		/* bl dest:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | 
			
		||||
		 * Yes, big-endian (the first 16 bits is the MSB).
 | 
			
		||||
		 * (Note that o is split.)
 | 
			
		||||
		 */
 | 
			
		||||
 | 
			
		||||
		uint32_t v = read4(addr, RELWR);
 | 
			
		||||
		uint32_t lovalue = (value/2) & 0x007fffff;
 | 
			
		||||
		uint32_t hivalue = (value/2) & 0x07800000;
 | 
			
		||||
		v &= 0xf0800000;
 | 
			
		||||
		v |= lovalue | (hivalue<<1);
 | 
			
		||||
		write4(v, addr, RELWR);
 | 
			
		||||
	}
 | 
			
		||||
	else if ((opcode & 0xffe0) == 0xe500)
 | 
			
		||||
	{
 | 
			
		||||
        /* lea: [1110 0101 000 d:5] [o:32] */
 | 
			
		||||
 | 
			
		||||
		write4(value, addr+2, 0);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
		assert(0 && "unrecognised VC4 instruction");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The bits in type indicate how many bytes the value occupies and what
 | 
			
		||||
 * significance should be attributed to each byte.
 | 
			
		||||
| 
						 | 
				
			
			@ -156,27 +256,8 @@ putvalu(valu, addr, type)
 | 
			
		|||
		write2(valu>>16, addr, type);
 | 
			
		||||
		break;
 | 
			
		||||
	case RELOVC4:
 | 
			
		||||
	{
 | 
			
		||||
		long i = read4(addr, type);
 | 
			
		||||
		if (i & 0x00800000)
 | 
			
		||||
		{
 | 
			
		||||
			/* Branch instruction. */
 | 
			
		||||
			unsigned v = (valu/2) & 0x007fffff;
 | 
			
		||||
			i &= ~0x007fffff;
 | 
			
		||||
			i |= v;
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			/* Branch-link instruction. */
 | 
			
		||||
	        unsigned v = (valu/2) & 0x07ffffff;
 | 
			
		||||
	        unsigned hiv = v >> 23;
 | 
			
		||||
	        unsigned lov = v & 0x007fffff;
 | 
			
		||||
			i &= ~0x0f7fffff;
 | 
			
		||||
			i |= (lov>>16) | (hiv<<24);
 | 
			
		||||
		}
 | 
			
		||||
		write4(i, addr, type);
 | 
			
		||||
		put_vc4_valu(addr, valu);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	default:
 | 
			
		||||
		fatal("bad relocation size");
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue