Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!
--HG-- branch : dtrg-videocore
This commit is contained in:
		
							parent
							
								
									1312fe298b
								
							
						
					
					
						commit
						5e9102955c
					
				
					 4 changed files with 187 additions and 53 deletions
				
			
		|  | @ -7,6 +7,11 @@ | |||
| 
 | ||||
| #define maskx(v, x) (v & ((1<<(x))-1)) | ||||
| 
 | ||||
| static void toobig(void) | ||||
| { | ||||
| 	serror("offset too big to encode into instruction"); | ||||
| } | ||||
| 
 | ||||
| /* Assemble an ALU instruction where rb is a register. */ | ||||
| 
 | ||||
| void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) | ||||
|  | @ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) | |||
| 
 | ||||
| void branch_instr(int bl, int cc, struct expr_t* expr) | ||||
| { | ||||
| 	quad pc = DOTVAL; | ||||
| 	quad type = expr->typ & S_TYP; | ||||
| 
 | ||||
| 	/* Sanity checking. */ | ||||
|  | @ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | |||
| 			/* The VC4 branch instructions express distance in 2-byte
 | ||||
| 			 * words. */ | ||||
| 
 | ||||
| 			int d = (expr->val - DOTVAL) / 2; | ||||
| 			int d = (expr->val - pc) / 2; | ||||
| 
 | ||||
|         	/* We now know the worst case for the instruction layout. At
 | ||||
|         	 * this point we can emit the instructions, which may shrink | ||||
|  | @ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | |||
|         	     * close enough to the program counter, we can use a short- | ||||
|         	     * form instruction. */ | ||||
| 
 | ||||
|         	    if ((d >= -128) && (d < 127)) | ||||
|         	    if (fitx(d, 7)) | ||||
|         	    { | ||||
| 					emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); | ||||
| 					break; | ||||
|  | @ -140,15 +146,25 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | |||
| 
 | ||||
| 			if (bl) | ||||
| 			{ | ||||
| 				quad v = d & 0x07ffffff; | ||||
| 				quad hiv = v >> 23; | ||||
| 				quad lov = v & 0x007fffff; | ||||
| 				quad v, hiv, lov; | ||||
| 
 | ||||
| 				if (!fitx(d, 27)) | ||||
| 					toobig(); | ||||
| 
 | ||||
| 				v = maskx(d, 27); | ||||
| 				hiv = v >> 23; | ||||
| 				lov = v & 0x007fffff; | ||||
| 				emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); | ||||
| 				emit2(B16(00000000,00000000) | (lov&0xffff)); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				quad v = d & 0x007fffff; | ||||
| 				quad v; | ||||
| 
 | ||||
| 				if (!fitx(d, 23)) | ||||
| 					toobig(); | ||||
| 
 | ||||
| 				v = maskx(d, 23); | ||||
| 				emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); | ||||
| 				emit2(B16(00000000,00000000) | (v&0xffff)); | ||||
| 			} | ||||
|  | @ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) | |||
| 
 | ||||
| void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | ||||
| { | ||||
| 	static const char sizes[] = {4, 2, 1, 2}; | ||||
| 	int size = sizes[opcode]; | ||||
| 	quad type = expr->typ & S_TYP; | ||||
| 
 | ||||
| 	/* Sanity checking. */ | ||||
|  | @ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | |||
| 		case 1: | ||||
| 		case 2: | ||||
| 		{ | ||||
| 			/* The VC4 branch instructions express distance in 2-byte
 | ||||
| 			 * words. */ | ||||
| 
 | ||||
| 			int d = (expr->val - DOTVAL) / 2; | ||||
| 			int d = expr->val - DOTVAL; | ||||
| 
 | ||||
|         	/* We now know the worst case for the instruction layout. At
 | ||||
|         	 * this point we can emit the instructions, which may shrink | ||||
|  | @ -365,25 +380,31 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | |||
| 
 | ||||
| 			if (type == DOTTYP) | ||||
| 			{ | ||||
| 				int scaledd = d/size; | ||||
| 
 | ||||
|         	    /* This is a reference to an address within this section. If
 | ||||
|         	     * it's close enough to the program counter, we can use a | ||||
|         	     * shorter instruction. */ | ||||
| 
 | ||||
| 				if (fitx(d, 16)) | ||||
| 				if (fitx(scaledd, 16)) | ||||
| 				{ | ||||
|                     emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); | ||||
|                     emit2(d); | ||||
|                     emit2(scaledd); | ||||
|                     return; | ||||
|                 } | ||||
| 			} | ||||
| 
 | ||||
| 			/* Otherwise we need the full 48 bits. */ | ||||
| 
 | ||||
|             if (!fitx(d, 27)) | ||||
|                 serror("offset too big to encode into instruction"); | ||||
| 
 | ||||
| 			newrelo(expr->typ, RELOVC4|RELPC); | ||||
| 
 | ||||
| 			/* VC4 relocations store the PC-relative delta into the
 | ||||
| 			 * destination section in the instruction data. The linker will | ||||
| 			 * massage this, and scale it appropriately. */ | ||||
| 
 | ||||
|             if (!fitx(d, 27)) | ||||
| 				toobig(); | ||||
| 
 | ||||
|             emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); | ||||
|             emit4((31<<27) | maskx(d, 27)); | ||||
| 			break; | ||||
|  | @ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs) | |||
| 
 | ||||
| void lea_address_instr(int rd, struct expr_t* expr) | ||||
| { | ||||
| 	newrelo(expr->typ, RELOVC4); | ||||
| 	quad pc = DOTVAL; | ||||
| 	quad type = expr->typ & S_TYP; | ||||
| 
 | ||||
| 	if (type == S_ABS) | ||||
| 		serror("can't use absolute addresses here"); | ||||
| 
 | ||||
| 	newrelo(expr->typ, RELOVC4|RELPC); | ||||
| 
 | ||||
| 	/* VC4 relocations store the PC-relative delta into the
 | ||||
| 	 * destination section in the instruction data. The linker will | ||||
| 	 * massage this, and scale it appropriately. */ | ||||
| 
 | ||||
| 	emit2(B16(11100101,00000000) | (rd<<0)); | ||||
| 	emit4(expr->val); | ||||
| 	emit4(expr->val - pc); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -7,3 +7,8 @@ | |||
|  */ | ||||
| 
 | ||||
| #include "videocore.h" | ||||
| 
 | ||||
| .define	__dummy
 | ||||
| .sect .data | ||||
| __dummy: | ||||
| 
 | ||||
|  |  | |||
|  | @ -16,6 +16,10 @@ | |||
| .sect .text | ||||
| 
 | ||||
| begtext: | ||||
| 	lea r15, begtext | ||||
| 	st sp, .returnsp | ||||
| 	st lr, .returnlr | ||||
| 
 | ||||
| #if 0 | ||||
| 	! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) | ||||
| 	 | ||||
|  | @ -33,6 +37,12 @@ begtext: | |||
| #endif | ||||
| 	b __m_a_i_n | ||||
| 
 | ||||
| .define __exit
 | ||||
| __exit: | ||||
| 	ld sp, .returnsp | ||||
| 	ld lr, .returnlr | ||||
| 	b lr | ||||
| 
 | ||||
| ! Define symbols at the beginning of our various segments, so that we can find | ||||
| ! them. (Except .text, which has already been done.) | ||||
| 
 | ||||
|  | @ -47,3 +57,9 @@ begtext: | |||
| .comm .trppc, 4 | ||||
| .comm .ignmask, 4 | ||||
| .comm _errno, 4 | ||||
| 
 | ||||
| ! We store the stack pointer and return address on entry so that we can | ||||
| ! cleanly exit. | ||||
| 
 | ||||
| .comm .returnsp, 4 | ||||
| .comm .returnlr, 4 | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ static char rcsid[] = "$Id$"; | |||
| 
 | ||||
| #include <stdlib.h> | ||||
| #include <stdio.h> | ||||
| #include <stdint.h> | ||||
| #include <assert.h> | ||||
| #include "out.h" | ||||
| #include "const.h" | ||||
|  | @ -44,6 +45,65 @@ static long read4(char* addr, int type) | |||
| 		return ((long)word1 << (2 * WIDTH)) + word0; | ||||
| } | ||||
| 
 | ||||
| /* VideoCore 4 fixups are complex as we need to patch the instruction in
 | ||||
|  * one of several different ways (depending on what the instruction is). | ||||
|  */ | ||||
| 
 | ||||
| static long get_vc4_valu(char* addr) | ||||
| { | ||||
| 	uint16_t opcode = read2(addr, 0); | ||||
| 
 | ||||
| 	if ((opcode & 0xff00) == 0xe700) | ||||
| 	{ | ||||
| 		/* ld<w> rd, $+o:  [1110 0111 ww 0 d:5] [11111 o:27]
 | ||||
| 		 * st<w> rd, $+o:  [1110 0111 ww 1 d:5] [11111 o:27] | ||||
| 		 */ | ||||
| 
 | ||||
| 		int32_t value = read4(addr+2, 0); | ||||
| 		value &= 0x07ffffff; | ||||
| 		value = value<<5>>5; | ||||
| 		return value; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((opcode & 0xf080) == 0x9000) | ||||
| 	{ | ||||
| 		/* b<cc> $+o*2:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | ||||
| 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||
| 		 */ | ||||
| 
 | ||||
| 		uint32_t value = read4(addr, RELWR); | ||||
| 		value &= 0x007fffff; | ||||
| 		value = value<<9>>9; | ||||
| 		value *= 2; | ||||
| 		return value; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((opcode & 0xf080) == 0x9080) | ||||
| 	{ | ||||
| 		/* bl $+o*2:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | ||||
| 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||
| 		 * (Note that o is split.) | ||||
| 		 */ | ||||
| 
 | ||||
| 		int32_t value = read4(addr, RELWR); | ||||
| 		int32_t lov = value & 0x007fffff; | ||||
| 		int32_t hiv = value & 0x0f000000; | ||||
| 		value = lov | (hiv>>1); | ||||
| 		value = value<<5>>5; | ||||
| 		value *= 2; | ||||
| 		return value; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((opcode & 0xffe0) == 0xe500) | ||||
| 	{ | ||||
|         /* lea: [1110 0101 000 d:5] [o:32] */ | ||||
| 
 | ||||
|         return read4(addr+2, 0); | ||||
|     } | ||||
| 
 | ||||
| 	assert(0 && "unrecognised VC4 instruction"); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The bits in type indicate how many bytes the value occupies and what | ||||
|  * significance should be attributed to each byte. | ||||
|  | @ -65,21 +125,7 @@ getvalu(addr, type) | |||
| 	case RELOH2: | ||||
| 		return read2(addr, type) << 16; | ||||
| 	case RELOVC4: | ||||
| 	{ | ||||
| 		long i = read4(addr, type); | ||||
| 		if (i & 0x00800000) | ||||
| 		{ | ||||
|             /* Branch instruction. */ | ||||
|             return (i<<9)>>9; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			/* Branch-link instruction. */ | ||||
| 			long hi = (i<<4)>>28; | ||||
| 			long lo = (i & 0x007fffff); | ||||
| 			return lo | (hi<<23); | ||||
| 		} | ||||
| 	} | ||||
| 		return get_vc4_valu(addr); | ||||
| 	default: | ||||
| 		fatal("bad relocation size"); | ||||
| 	} | ||||
|  | @ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* VideoCore 4 fixups are complex as we need to patch the instruction in
 | ||||
|  * one of several different ways (depending on what the instruction is). | ||||
|  */ | ||||
| 
 | ||||
| static void put_vc4_valu(char* addr, long value) | ||||
| { | ||||
| 	uint16_t opcode = read2(addr, 0); | ||||
| 
 | ||||
| 	if ((opcode & 0xff00) == 0xe700) | ||||
| 	{ | ||||
| 		/* ld<w> rd, o, (pc):  [1110 0111 ww 0 d:5] [11111 o:27]
 | ||||
| 		 * st<w> rd, o, (pc):  [1110 0111 ww 1 d:5] [11111 o:27] | ||||
| 		 */ | ||||
| 
 | ||||
| 		uint32_t v = read4(addr+2, 0); | ||||
| 		v &= 0xf8000000; | ||||
| 		v |= value & 0x07ffffff; | ||||
| 		write4(v, addr+2, 0); | ||||
| 	} | ||||
| 	else if ((opcode & 0xf080) == 0x9000) | ||||
| 	{ | ||||
| 		/* b<cc> dest:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | ||||
| 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||
| 		 */ | ||||
| 
 | ||||
| 		uint32_t v = read4(addr, RELWR); | ||||
| 		v &= 0xff800000; | ||||
| 		v |= (value/2) & 0x007fffff; | ||||
| 		write4(v, addr, RELWR); | ||||
| 	} | ||||
| 	else if ((opcode & 0xf080) == 0x9080) | ||||
| 	{ | ||||
| 		/* bl dest:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | ||||
| 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||
| 		 * (Note that o is split.) | ||||
| 		 */ | ||||
| 
 | ||||
| 		uint32_t v = read4(addr, RELWR); | ||||
| 		uint32_t lovalue = (value/2) & 0x007fffff; | ||||
| 		uint32_t hivalue = (value/2) & 0x07800000; | ||||
| 		v &= 0xf0800000; | ||||
| 		v |= lovalue | (hivalue<<1); | ||||
| 		write4(v, addr, RELWR); | ||||
| 	} | ||||
| 	else if ((opcode & 0xffe0) == 0xe500) | ||||
| 	{ | ||||
|         /* lea: [1110 0101 000 d:5] [o:32] */ | ||||
| 
 | ||||
| 		write4(value, addr+2, 0); | ||||
|     } | ||||
|     else | ||||
| 		assert(0 && "unrecognised VC4 instruction"); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The bits in type indicate how many bytes the value occupies and what | ||||
|  * significance should be attributed to each byte. | ||||
|  | @ -156,27 +256,8 @@ putvalu(valu, addr, type) | |||
| 		write2(valu>>16, addr, type); | ||||
| 		break; | ||||
| 	case RELOVC4: | ||||
| 	{ | ||||
| 		long i = read4(addr, type); | ||||
| 		if (i & 0x00800000) | ||||
| 		{ | ||||
| 			/* Branch instruction. */ | ||||
| 			unsigned v = (valu/2) & 0x007fffff; | ||||
| 			i &= ~0x007fffff; | ||||
| 			i |= v; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			/* Branch-link instruction. */ | ||||
| 	        unsigned v = (valu/2) & 0x07ffffff; | ||||
| 	        unsigned hiv = v >> 23; | ||||
| 	        unsigned lov = v & 0x007fffff; | ||||
| 			i &= ~0x0f7fffff; | ||||
| 			i |= (lov>>16) | (hiv<<24); | ||||
| 		} | ||||
| 		write4(i, addr, type); | ||||
| 		put_vc4_valu(addr, valu); | ||||
| 		break; | ||||
| 	} | ||||
| 	default: | ||||
| 		fatal("bad relocation size"); | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue