Reworked VC4 relocations and some of the instruction encoding to be actually correct. Now generating what could be real code!
--HG-- branch : dtrg-videocore
This commit is contained in:
		
							parent
							
								
									1312fe298b
								
							
						
					
					
						commit
						5e9102955c
					
				
					 4 changed files with 187 additions and 53 deletions
				
			
		|  | @ -7,6 +7,11 @@ | ||||||
| 
 | 
 | ||||||
| #define maskx(v, x) (v & ((1<<(x))-1)) | #define maskx(v, x) (v & ((1<<(x))-1)) | ||||||
| 
 | 
 | ||||||
|  | static void toobig(void) | ||||||
|  | { | ||||||
|  | 	serror("offset too big to encode into instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Assemble an ALU instruction where rb is a register. */ | /* Assemble an ALU instruction where rb is a register. */ | ||||||
| 
 | 
 | ||||||
| void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) | void alu_instr_reg(quad op, int cc, int rd, int ra, int rb) | ||||||
|  | @ -90,6 +95,7 @@ void misc_instr_lit(quad op, int cc, int rd, int ra, quad value) | ||||||
| 
 | 
 | ||||||
| void branch_instr(int bl, int cc, struct expr_t* expr) | void branch_instr(int bl, int cc, struct expr_t* expr) | ||||||
| { | { | ||||||
|  | 	quad pc = DOTVAL; | ||||||
| 	quad type = expr->typ & S_TYP; | 	quad type = expr->typ & S_TYP; | ||||||
| 
 | 
 | ||||||
| 	/* Sanity checking. */ | 	/* Sanity checking. */ | ||||||
|  | @ -114,7 +120,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | ||||||
| 			/* The VC4 branch instructions express distance in 2-byte
 | 			/* The VC4 branch instructions express distance in 2-byte
 | ||||||
| 			 * words. */ | 			 * words. */ | ||||||
| 
 | 
 | ||||||
| 			int d = (expr->val - DOTVAL) / 2; | 			int d = (expr->val - pc) / 2; | ||||||
| 
 | 
 | ||||||
|         	/* We now know the worst case for the instruction layout. At
 |         	/* We now know the worst case for the instruction layout. At
 | ||||||
|         	 * this point we can emit the instructions, which may shrink |         	 * this point we can emit the instructions, which may shrink | ||||||
|  | @ -126,7 +132,7 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | ||||||
|         	     * close enough to the program counter, we can use a short- |         	     * close enough to the program counter, we can use a short- | ||||||
|         	     * form instruction. */ |         	     * form instruction. */ | ||||||
| 
 | 
 | ||||||
|         	    if ((d >= -128) && (d < 127)) |         	    if (fitx(d, 7)) | ||||||
|         	    { |         	    { | ||||||
| 					emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); | 					emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f)); | ||||||
| 					break; | 					break; | ||||||
|  | @ -140,15 +146,25 @@ void branch_instr(int bl, int cc, struct expr_t* expr) | ||||||
| 
 | 
 | ||||||
| 			if (bl) | 			if (bl) | ||||||
| 			{ | 			{ | ||||||
| 				quad v = d & 0x07ffffff; | 				quad v, hiv, lov; | ||||||
| 				quad hiv = v >> 23; | 
 | ||||||
| 				quad lov = v & 0x007fffff; | 				if (!fitx(d, 27)) | ||||||
|  | 					toobig(); | ||||||
|  | 
 | ||||||
|  | 				v = maskx(d, 27); | ||||||
|  | 				hiv = v >> 23; | ||||||
|  | 				lov = v & 0x007fffff; | ||||||
| 				emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); | 				emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8)); | ||||||
| 				emit2(B16(00000000,00000000) | (lov&0xffff)); | 				emit2(B16(00000000,00000000) | (lov&0xffff)); | ||||||
| 			} | 			} | ||||||
| 			else | 			else | ||||||
| 			{ | 			{ | ||||||
| 				quad v = d & 0x007fffff; | 				quad v; | ||||||
|  | 
 | ||||||
|  | 				if (!fitx(d, 23)) | ||||||
|  | 					toobig(); | ||||||
|  | 
 | ||||||
|  | 				v = maskx(d, 23); | ||||||
| 				emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); | 				emit2(B16(10010000,00000000) | (cc<<8) | (v>>16)); | ||||||
| 				emit2(B16(00000000,00000000) | (v&0xffff)); | 				emit2(B16(00000000,00000000) | (v&0xffff)); | ||||||
| 			} | 			} | ||||||
|  | @ -334,6 +350,8 @@ void mem_postincr_instr(quad opcode, int cc, int rd, int rs) | ||||||
| 
 | 
 | ||||||
| void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | ||||||
| { | { | ||||||
|  | 	static const char sizes[] = {4, 2, 1, 2}; | ||||||
|  | 	int size = sizes[opcode]; | ||||||
| 	quad type = expr->typ & S_TYP; | 	quad type = expr->typ & S_TYP; | ||||||
| 
 | 
 | ||||||
| 	/* Sanity checking. */ | 	/* Sanity checking. */ | ||||||
|  | @ -354,10 +372,7 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | ||||||
| 		case 1: | 		case 1: | ||||||
| 		case 2: | 		case 2: | ||||||
| 		{ | 		{ | ||||||
| 			/* The VC4 branch instructions express distance in 2-byte
 | 			int d = expr->val - DOTVAL; | ||||||
| 			 * words. */ |  | ||||||
| 
 |  | ||||||
| 			int d = (expr->val - DOTVAL) / 2; |  | ||||||
| 
 | 
 | ||||||
|         	/* We now know the worst case for the instruction layout. At
 |         	/* We now know the worst case for the instruction layout. At
 | ||||||
|         	 * this point we can emit the instructions, which may shrink |         	 * this point we can emit the instructions, which may shrink | ||||||
|  | @ -365,25 +380,31 @@ void mem_address_instr(quad opcode, int rd, struct expr_t* expr) | ||||||
| 
 | 
 | ||||||
| 			if (type == DOTTYP) | 			if (type == DOTTYP) | ||||||
| 			{ | 			{ | ||||||
|  | 				int scaledd = d/size; | ||||||
|  | 
 | ||||||
|         	    /* This is a reference to an address within this section. If
 |         	    /* This is a reference to an address within this section. If
 | ||||||
|         	     * it's close enough to the program counter, we can use a |         	     * it's close enough to the program counter, we can use a | ||||||
|         	     * shorter instruction. */ |         	     * shorter instruction. */ | ||||||
| 
 | 
 | ||||||
| 				if (fitx(d, 16)) | 				if (fitx(scaledd, 16)) | ||||||
| 				{ | 				{ | ||||||
|                     emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); |                     emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0)); | ||||||
|                     emit2(d); |                     emit2(scaledd); | ||||||
|                     return; |                     return; | ||||||
|                 } |                 } | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			/* Otherwise we need the full 48 bits. */ | 			/* Otherwise we need the full 48 bits. */ | ||||||
| 
 | 
 | ||||||
|             if (!fitx(d, 27)) |  | ||||||
|                 serror("offset too big to encode into instruction"); |  | ||||||
| 
 |  | ||||||
| 			newrelo(expr->typ, RELOVC4|RELPC); | 			newrelo(expr->typ, RELOVC4|RELPC); | ||||||
| 
 | 
 | ||||||
|  | 			/* VC4 relocations store the PC-relative delta into the
 | ||||||
|  | 			 * destination section in the instruction data. The linker will | ||||||
|  | 			 * massage this, and scale it appropriately. */ | ||||||
|  | 
 | ||||||
|  |             if (!fitx(d, 27)) | ||||||
|  | 				toobig(); | ||||||
|  | 
 | ||||||
|             emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); |             emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0)); | ||||||
|             emit4((31<<27) | maskx(d, 27)); |             emit4((31<<27) | maskx(d, 27)); | ||||||
| 			break; | 			break; | ||||||
|  | @ -493,8 +514,19 @@ void lea_stack_instr(int rd, long va, int rs) | ||||||
| 
 | 
 | ||||||
| void lea_address_instr(int rd, struct expr_t* expr) | void lea_address_instr(int rd, struct expr_t* expr) | ||||||
| { | { | ||||||
| 	newrelo(expr->typ, RELOVC4); | 	quad pc = DOTVAL; | ||||||
|  | 	quad type = expr->typ & S_TYP; | ||||||
|  | 
 | ||||||
|  | 	if (type == S_ABS) | ||||||
|  | 		serror("can't use absolute addresses here"); | ||||||
|  | 
 | ||||||
|  | 	newrelo(expr->typ, RELOVC4|RELPC); | ||||||
|  | 
 | ||||||
|  | 	/* VC4 relocations store the PC-relative delta into the
 | ||||||
|  | 	 * destination section in the instruction data. The linker will | ||||||
|  | 	 * massage this, and scale it appropriately. */ | ||||||
|  | 
 | ||||||
| 	emit2(B16(11100101,00000000) | (rd<<0)); | 	emit2(B16(11100101,00000000) | (rd<<0)); | ||||||
| 	emit4(expr->val); | 	emit4(expr->val - pc); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -7,3 +7,8 @@ | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| #include "videocore.h" | #include "videocore.h" | ||||||
|  | 
 | ||||||
|  | .define	__dummy
 | ||||||
|  | .sect .data | ||||||
|  | __dummy: | ||||||
|  | 
 | ||||||
|  |  | ||||||
|  | @ -16,6 +16,10 @@ | ||||||
| .sect .text | .sect .text | ||||||
| 
 | 
 | ||||||
| begtext: | begtext: | ||||||
|  | 	lea r15, begtext | ||||||
|  | 	st sp, .returnsp | ||||||
|  | 	st lr, .returnlr | ||||||
|  | 
 | ||||||
| #if 0 | #if 0 | ||||||
| 	! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) | 	! Wipe the bss. (I'm a little suprised that __m_a_i_n doesn't do this.) | ||||||
| 	 | 	 | ||||||
|  | @ -33,6 +37,12 @@ begtext: | ||||||
| #endif | #endif | ||||||
| 	b __m_a_i_n | 	b __m_a_i_n | ||||||
| 
 | 
 | ||||||
|  | .define __exit
 | ||||||
|  | __exit: | ||||||
|  | 	ld sp, .returnsp | ||||||
|  | 	ld lr, .returnlr | ||||||
|  | 	b lr | ||||||
|  | 
 | ||||||
| ! Define symbols at the beginning of our various segments, so that we can find | ! Define symbols at the beginning of our various segments, so that we can find | ||||||
| ! them. (Except .text, which has already been done.) | ! them. (Except .text, which has already been done.) | ||||||
| 
 | 
 | ||||||
|  | @ -47,3 +57,9 @@ begtext: | ||||||
| .comm .trppc, 4 | .comm .trppc, 4 | ||||||
| .comm .ignmask, 4 | .comm .ignmask, 4 | ||||||
| .comm _errno, 4 | .comm _errno, 4 | ||||||
|  | 
 | ||||||
|  | ! We store the stack pointer and return address on entry so that we can | ||||||
|  | ! cleanly exit. | ||||||
|  | 
 | ||||||
|  | .comm .returnsp, 4 | ||||||
|  | .comm .returnlr, 4 | ||||||
|  |  | ||||||
|  | @ -8,6 +8,7 @@ static char rcsid[] = "$Id$"; | ||||||
| 
 | 
 | ||||||
| #include <stdlib.h> | #include <stdlib.h> | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
|  | #include <stdint.h> | ||||||
| #include <assert.h> | #include <assert.h> | ||||||
| #include "out.h" | #include "out.h" | ||||||
| #include "const.h" | #include "const.h" | ||||||
|  | @ -44,6 +45,65 @@ static long read4(char* addr, int type) | ||||||
| 		return ((long)word1 << (2 * WIDTH)) + word0; | 		return ((long)word1 << (2 * WIDTH)) + word0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* VideoCore 4 fixups are complex as we need to patch the instruction in
 | ||||||
|  |  * one of several different ways (depending on what the instruction is). | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | static long get_vc4_valu(char* addr) | ||||||
|  | { | ||||||
|  | 	uint16_t opcode = read2(addr, 0); | ||||||
|  | 
 | ||||||
|  | 	if ((opcode & 0xff00) == 0xe700) | ||||||
|  | 	{ | ||||||
|  | 		/* ld<w> rd, $+o:  [1110 0111 ww 0 d:5] [11111 o:27]
 | ||||||
|  | 		 * st<w> rd, $+o:  [1110 0111 ww 1 d:5] [11111 o:27] | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		int32_t value = read4(addr+2, 0); | ||||||
|  | 		value &= 0x07ffffff; | ||||||
|  | 		value = value<<5>>5; | ||||||
|  | 		return value; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if ((opcode & 0xf080) == 0x9000) | ||||||
|  | 	{ | ||||||
|  | 		/* b<cc> $+o*2:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | ||||||
|  | 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		uint32_t value = read4(addr, RELWR); | ||||||
|  | 		value &= 0x007fffff; | ||||||
|  | 		value = value<<9>>9; | ||||||
|  | 		value *= 2; | ||||||
|  | 		return value; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if ((opcode & 0xf080) == 0x9080) | ||||||
|  | 	{ | ||||||
|  | 		/* bl $+o*2:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | ||||||
|  | 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||||
|  | 		 * (Note that o is split.) | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		int32_t value = read4(addr, RELWR); | ||||||
|  | 		int32_t lov = value & 0x007fffff; | ||||||
|  | 		int32_t hiv = value & 0x0f000000; | ||||||
|  | 		value = lov | (hiv>>1); | ||||||
|  | 		value = value<<5>>5; | ||||||
|  | 		value *= 2; | ||||||
|  | 		return value; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if ((opcode & 0xffe0) == 0xe500) | ||||||
|  | 	{ | ||||||
|  |         /* lea: [1110 0101 000 d:5] [o:32] */ | ||||||
|  | 
 | ||||||
|  |         return read4(addr+2, 0); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 	assert(0 && "unrecognised VC4 instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The bits in type indicate how many bytes the value occupies and what |  * The bits in type indicate how many bytes the value occupies and what | ||||||
|  * significance should be attributed to each byte. |  * significance should be attributed to each byte. | ||||||
|  | @ -65,21 +125,7 @@ getvalu(addr, type) | ||||||
| 	case RELOH2: | 	case RELOH2: | ||||||
| 		return read2(addr, type) << 16; | 		return read2(addr, type) << 16; | ||||||
| 	case RELOVC4: | 	case RELOVC4: | ||||||
| 	{ | 		return get_vc4_valu(addr); | ||||||
| 		long i = read4(addr, type); |  | ||||||
| 		if (i & 0x00800000) |  | ||||||
| 		{ |  | ||||||
|             /* Branch instruction. */ |  | ||||||
|             return (i<<9)>>9; |  | ||||||
| 		} |  | ||||||
| 		else |  | ||||||
| 		{ |  | ||||||
| 			/* Branch-link instruction. */ |  | ||||||
| 			long hi = (i<<4)>>28; |  | ||||||
| 			long lo = (i & 0x007fffff); |  | ||||||
| 			return lo | (hi<<23); |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	default: | 	default: | ||||||
| 		fatal("bad relocation size"); | 		fatal("bad relocation size"); | ||||||
| 	} | 	} | ||||||
|  | @ -123,6 +169,60 @@ static void write4(long valu, char* addr, int type) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* VideoCore 4 fixups are complex as we need to patch the instruction in
 | ||||||
|  |  * one of several different ways (depending on what the instruction is). | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | static void put_vc4_valu(char* addr, long value) | ||||||
|  | { | ||||||
|  | 	uint16_t opcode = read2(addr, 0); | ||||||
|  | 
 | ||||||
|  | 	if ((opcode & 0xff00) == 0xe700) | ||||||
|  | 	{ | ||||||
|  | 		/* ld<w> rd, o, (pc):  [1110 0111 ww 0 d:5] [11111 o:27]
 | ||||||
|  | 		 * st<w> rd, o, (pc):  [1110 0111 ww 1 d:5] [11111 o:27] | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		uint32_t v = read4(addr+2, 0); | ||||||
|  | 		v &= 0xf8000000; | ||||||
|  | 		v |= value & 0x07ffffff; | ||||||
|  | 		write4(v, addr+2, 0); | ||||||
|  | 	} | ||||||
|  | 	else if ((opcode & 0xf080) == 0x9000) | ||||||
|  | 	{ | ||||||
|  | 		/* b<cc> dest:  [1001 cccc 0ooo oooo] [oooo oooo oooo oooo]
 | ||||||
|  | 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		uint32_t v = read4(addr, RELWR); | ||||||
|  | 		v &= 0xff800000; | ||||||
|  | 		v |= (value/2) & 0x007fffff; | ||||||
|  | 		write4(v, addr, RELWR); | ||||||
|  | 	} | ||||||
|  | 	else if ((opcode & 0xf080) == 0x9080) | ||||||
|  | 	{ | ||||||
|  | 		/* bl dest:  [1001 oooo 1ooo oooo] [oooo oooo oooo oooo]
 | ||||||
|  | 		 * Yes, big-endian (the first 16 bits is the MSB). | ||||||
|  | 		 * (Note that o is split.) | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		uint32_t v = read4(addr, RELWR); | ||||||
|  | 		uint32_t lovalue = (value/2) & 0x007fffff; | ||||||
|  | 		uint32_t hivalue = (value/2) & 0x07800000; | ||||||
|  | 		v &= 0xf0800000; | ||||||
|  | 		v |= lovalue | (hivalue<<1); | ||||||
|  | 		write4(v, addr, RELWR); | ||||||
|  | 	} | ||||||
|  | 	else if ((opcode & 0xffe0) == 0xe500) | ||||||
|  | 	{ | ||||||
|  |         /* lea: [1110 0101 000 d:5] [o:32] */ | ||||||
|  | 
 | ||||||
|  | 		write4(value, addr+2, 0); | ||||||
|  |     } | ||||||
|  |     else | ||||||
|  | 		assert(0 && "unrecognised VC4 instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The bits in type indicate how many bytes the value occupies and what |  * The bits in type indicate how many bytes the value occupies and what | ||||||
|  * significance should be attributed to each byte. |  * significance should be attributed to each byte. | ||||||
|  | @ -156,27 +256,8 @@ putvalu(valu, addr, type) | ||||||
| 		write2(valu>>16, addr, type); | 		write2(valu>>16, addr, type); | ||||||
| 		break; | 		break; | ||||||
| 	case RELOVC4: | 	case RELOVC4: | ||||||
| 	{ | 		put_vc4_valu(addr, valu); | ||||||
| 		long i = read4(addr, type); |  | ||||||
| 		if (i & 0x00800000) |  | ||||||
| 		{ |  | ||||||
| 			/* Branch instruction. */ |  | ||||||
| 			unsigned v = (valu/2) & 0x007fffff; |  | ||||||
| 			i &= ~0x007fffff; |  | ||||||
| 			i |= v; |  | ||||||
| 		} |  | ||||||
| 		else |  | ||||||
| 		{ |  | ||||||
| 			/* Branch-link instruction. */ |  | ||||||
| 	        unsigned v = (valu/2) & 0x07ffffff; |  | ||||||
| 	        unsigned hiv = v >> 23; |  | ||||||
| 	        unsigned lov = v & 0x007fffff; |  | ||||||
| 			i &= ~0x0f7fffff; |  | ||||||
| 			i |= (lov>>16) | (hiv<<24); |  | ||||||
| 		} |  | ||||||
| 		write4(i, addr, type); |  | ||||||
| 		break; | 		break; | ||||||
| 	} |  | ||||||
| 	default: | 	default: | ||||||
| 		fatal("bad relocation size"); | 		fatal("bad relocation size"); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue