501 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			501 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * VideoCore IV assembler for the ACK
 | |
|  * © 2013 David Given
 | |
|  * This file is redistributable under the terms of the 3-clause BSD license.
 | |
|  * See the file 'Copying' in the root of the distribution for the full text.
 | |
|  */
 | |
| 
 | |
| #include <stdint.h>
 | |
| 
 | |
| #define maskx(v, x) (v & ((1<<(x))-1))
 | |
| 
 | |
| static void toobig(void)
 | |
| {
 | |
| 	serror("offset too big to encode into instruction");
 | |
| }
 | |
| 
 | |
| /* Assemble an ALU instruction where rb is a register. */
 | |
| 
 | |
| void alu_instr_reg(quad op, int cc, int rd, int ra, int rb)
 | |
| {
 | |
| 	/* Can we use short form? */
 | |
| 
 | |
| 	if ((cc == ALWAYS) && (ra == rd) && (ra < 0x10) && (rb < 0x10))
 | |
| 	{
 | |
| 		emit2(B16(01000000,00000000) | (op<<8) | (rb<<4) | (rd<<0));
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
|     /* Long form, then. */
 | |
| 
 | |
|     emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
 | |
|     emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
 | |
| }
 | |
| 
 | |
| /* Assemble an ALU instruction where rb is a literal. */
 | |
| 
 | |
| void alu_instr_lit(quad op, int cc, int rd, int ra, long value)
 | |
| {
 | |
| 	/* 16 bit short form? */
 | |
| 
 | |
| 	if ((cc == ALWAYS) && !(op & 1) && (value >= 0) && (value <= 0x1f) &&
 | |
| 	    (ra == rd) && (ra < 0x10))
 | |
| 	{
 | |
| 		emit2(B16(01100000,00000000) | (op<<8) | (value<<4) | (rd<<0));
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* 32 bit medium form? */
 | |
| 
 | |
|     if ((value >= 0) && (value <= 0x1f))
 | |
|     {
 | |
|         emit2(B16(11000000,00000000) | (op<<5) | (rd<<0));
 | |
|         emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
 | |
|         return;
 | |
|     }
 | |
| 
 | |
| 	/* Long form, then. */
 | |
| 
 | |
| 	if (cc != ALWAYS)
 | |
| 		serror("cannot use condition codes with ALU literals this big");
 | |
| 
 | |
| 	/* add is special. */
 | |
| 
 | |
| 	if (op == B8(00000010))
 | |
| 		emit2(B16(11101100,00000000) | (ra<<5) | (rd<<0));
 | |
| 	else
 | |
| 	{
 | |
| 		if (ra != rd)
 | |
| 			serror("can only use 2op form of ALU instructions with literals this big");
 | |
| 		emit2(B16(11101000,00000000) | (op<<5) | (rd<<0));
 | |
| 	}
 | |
| 
 | |
| 	emit4(value);
 | |
| }
 | |
| 
 | |
| /* Miscellaneous instructions with three registers and a cc. */
 | |
| 
 | |
| void misc_instr_reg(quad op, int cc, int rd, int ra, int rb)
 | |
| {
 | |
| 	emit2(op | (rd<<0));
 | |
| 	emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
 | |
| }
 | |
| 
 | |
| /* Miscellaneous instructions with two registers, a literal, and a cc. */
 | |
| 
 | |
| void misc_instr_lit(quad op, int cc, int rd, int ra, quad value)
 | |
| {
 | |
|     if (value < 0x1f)
 | |
|         serror("only constants from 0..31 can be used here");
 | |
| 
 | |
| 	emit2(op | (rd<<0));
 | |
| 	emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | (value<<0));
 | |
| }
 | |
| 
 | |
| /* Assemble a branch instruction. This may be a near branch into this
 | |
|  * object file, or a far branch which requires a fixup. */
 | |
| 
 | |
| void branch_instr(int bl, int cc, struct expr_t* expr)
 | |
| {
 | |
| 	quad pc = DOTVAL;
 | |
| 	quad type = expr->typ & S_TYP;
 | |
| 	int d;
 | |
| 
 | |
| 	/* Sanity checking. */
 | |
| 
 | |
| 	if (bl && (cc != ALWAYS))
 | |
| 		serror("can't use condition codes with bl");
 | |
| 	if (type == S_ABS)
 | |
| 		serror("can't use absolute addresses here");
 | |
| 
 | |
| 	/* The VC4 branch instructions express distance in 2-byte
 | |
| 	 * words. */
 | |
| 
 | |
| 	d = (int32_t)expr->val - (int32_t)pc;
 | |
| 	if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT))
 | |
|         d -= DOTGAIN;
 | |
| 	d /= 2;
 | |
| 
 | |
|     /* If this is a reference to code within this section, and it's
 | |
|      * close enough to the program counter, we can use a short-
 | |
|      * form instruction. */
 | |
| 
 | |
|     if (small(!bl && (type == DOTTYP) && fitx(d, 7), 2))
 | |
|     {
 | |
| 		emit2(B16(00011000,00000000) | (cc<<7) | (d&0x7f));
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* Absolute addresses and references to other sections
 | |
| 	 * need the full 32 bits. */
 | |
| 
 | |
| 	newrelo(expr->typ, RELOVC4|RELPC);
 | |
| 
 | |
| 	if (bl)
 | |
| 	{
 | |
| 		quad v, hiv, lov;
 | |
| 
 | |
| 		if (!fitx(d, 27))
 | |
| 			toobig();
 | |
| 
 | |
| 		v = maskx(d, 27);
 | |
| 		hiv = v >> 23;
 | |
| 		lov = v & 0x007fffff;
 | |
| 		emit2(B16(10010000,10000000) | (lov>>16) | (hiv<<8));
 | |
| 		emit2(B16(00000000,00000000) | (lov&0xffff));
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		quad v;
 | |
| 
 | |
| 		if (!fitx(d, 23))
 | |
| 			toobig();
 | |
| 
 | |
| 		v = maskx(d, 23);
 | |
| 		emit2(B16(10010000,00000000) | (cc<<8) | (v>>16));
 | |
| 		emit2(B16(00000000,00000000) | (v&0xffff));
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* Push/pop. */
 | |
| 
 | |
| void stack_instr(quad opcode, int loreg, int hireg, int extrareg)
 | |
| {
 | |
|     int b;
 | |
|     int m;
 | |
| 
 | |
|     switch (loreg)
 | |
|     {
 | |
|         case 0: b = 0; break;
 | |
|         case 6: b = 1; break;
 | |
|         case 16: b = 2; break;
 | |
|         case 24: b = 3; break;
 | |
| 
 | |
|         case 26: /* lr */
 | |
|             extrareg = 26;
 | |
|             hireg = loreg = -1;
 | |
|             break;
 | |
| 
 | |
| 		case 31: /* pc */
 | |
| 			extrareg = 31;
 | |
| 			hireg = loreg = -1;
 | |
| 			break;
 | |
| 
 | |
| 		default:
 | |
| 			serror("base register for push or pop may be only r0, r6, r16, r24, lr or pc");
 | |
| 	}
 | |
| 
 | |
| 	if (opcode & 0x0080)
 | |
| 	{
 | |
| 		/* Push */
 | |
| 		if (extrareg == 31)
 | |
| 			serror("cannot push pc");
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		/* Pop */
 | |
| 		if (extrareg == 26)
 | |
| 			serror("cannot pop lr");
 | |
| 	}
 | |
| 
 | |
| 	if (hireg < loreg)
 | |
| 		serror("invalid register range");
 | |
| 
 | |
| 	if (hireg == -1)
 | |
| 	{
 | |
| 		b = 3;
 | |
| 		m = 15;
 | |
| 	}
 | |
| 	else
 | |
| 		m = hireg - loreg;
 | |
| 
 | |
| 	emit2(opcode | (b<<5) | (m<<0) | ((extrareg != -1) ? 0x0100 : 0));
 | |
| }
 | |
| 
 | |
| /* Memory operations where the offset is a fixed value (including zero). */
 | |
| 
 | |
| void mem_instr(quad opcode, int cc, int rd, long offset, int rs)
 | |
| {
 | |
| 	quad uoffset = (quad) offset;
 | |
| 	int multiple4 = !(offset & 3);
 | |
| 	int intonly = ((opcode & B8(00000110)) == 0);
 | |
| 
 | |
| 	/* If no CC, there are some special forms we can use. */
 | |
| 
 | |
| 	if (cc == ALWAYS)
 | |
| 	{
 | |
| 		/* Very short form, special for stack offsets. */
 | |
| 
 | |
| 		if (intonly && (rs == 25) && multiple4 && fitx(offset, 7) && (rd < 0x10))
 | |
| 		{
 | |
| 			quad o = maskx(offset, 7) / 4;
 | |
| 			emit2(B16(00000100,00000000) | (opcode<<9) | (o<<4) | (rd<<0));
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		/* Slightly longer form for directly dereferencing via a register. */
 | |
| 
 | |
| 		if ((rs < 0x10) && (rd < 0x10) && (offset == 0))
 | |
| 		{
 | |
| 			emit2(B16(00001000,00000000) | (opcode<<8) | (rs<<4) | (rd<<0));
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 	    /* Integer only, but a limited offset. */
 | |
| 
 | |
| 	    if (intonly && (uoffset <= 0x3f) && (rs < 0x10) && (rd < 0x10))
 | |
| 		{
 | |
| 			quad o = uoffset / 4;
 | |
| 			emit2(B16(00100000,00000000) | (opcode<<12) | (o<<8) |
 | |
| 				(rs<<4) | (rd<<0));
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		/* Certain registers support 16-bit offsets. */
 | |
| 
 | |
| 		if (fitx(offset, 16))
 | |
| 		{
 | |
| 			switch (rs)
 | |
| 			{
 | |
|                 case 0: opcode = B16(10101011,00000000) | (opcode<<5); goto specialreg;
 | |
|                 case 24: opcode = B16(10101000,00000000) | (opcode<<5); goto specialreg;
 | |
|                 case 25: opcode = B16(10101001,00000000) | (opcode<<5); goto specialreg;
 | |
|                 case 31: opcode = B16(10101010,00000000) | (opcode<<5); goto specialreg;
 | |
|                 default: break;
 | |
| 
 | |
|                 specialreg:
 | |
|                 {
 | |
|                     quad o = maskx(offset, 16);
 | |
|                     emit2(opcode | (rd<<0));
 | |
|                     emit2(o);
 | |
|                     return;
 | |
|                 }
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
|         /* 12-bit displacements. */
 | |
| 
 | |
|         if (fitx(offset, 12))
 | |
|         {
 | |
|         	quad looffset = maskx(offset, 11);
 | |
|         	quad hioffset = (offset >> 11) & 1;
 | |
| 
 | |
|         	emit2(B16(10100010,00000000) | (opcode<<5) | (rd<<0) | (hioffset<<8));
 | |
|         	emit2(B16(00000000,00000000) | (rs<<11) | (looffset<<0));
 | |
|         	return;
 | |
|         }
 | |
| 
 | |
|         /* Everything else uses Very Long Form. */
 | |
| 
 | |
| 		if (!fitx(offset, 27))
 | |
| 			serror("offset will not fit into load/store instruction");
 | |
| 
 | |
| 		if (rs == 31)
 | |
| 			opcode = B16(11100111,00000000) | (opcode<<5);
 | |
| 		else
 | |
| 			opcode = B16(11100110,00000000) | (opcode<<5);
 | |
| 
 | |
| 		emit2(opcode | (rd<<0));
 | |
| 		emit4((rs<<27) | maskx(offset, 27));
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* Now we're on to load/store instructions with ccs. */
 | |
| 
 | |
| 	if (uoffset <= 0x1f)
 | |
| 	{
 | |
| 		emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0));
 | |
| 		emit2(B16(00000000,01000000) | (rs<<11) | (cc<<7) | (uoffset<<0));
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* No encoding for this instruction. */
 | |
| 
 | |
| 	serror("invalid load/store instruction");
 | |
| }
 | |
| 
 | |
| /* Memory operations where the destination address is a sum of two
 | |
|  * registers. */
 | |
| 
 | |
| void mem_offset_instr(quad opcode, int cc, int rd, int ra, int rb)
 | |
| {
 | |
|     emit2(B16(10100000,00000000) | (opcode<<5) | (rd<<0));
 | |
|     emit2(B16(00000000,00000000) | (ra<<11) | (cc<<7) | (rb<<0));
 | |
| }
 | |
| 
 | |
| /* Memory operations with postincrement. */
 | |
| 
 | |
| void mem_postincr_instr(quad opcode, int cc, int rd, int rs)
 | |
| {
 | |
|     emit2(B16(10100101,00000000) | (opcode<<5) | (rd<<0));
 | |
|     emit2(B16(00000000,00000000) | (rs<<11) | (cc<<7));
 | |
| }
 | |
| 
 | |
| /* Memory operations where the destination is an address literal. */
 | |
| 
 | |
| void mem_address_instr(quad opcode, int rd, struct expr_t* expr)
 | |
| {
 | |
| 	static const char sizes[] = {4, 4, 2, 2, 1, 1, 2, 2};
 | |
| 	int size = sizes[opcode];
 | |
| 	quad type = expr->typ & S_TYP;
 | |
| 	int d, scaledd;
 | |
| 
 | |
| 	/* Sanity checking. */
 | |
| 
 | |
| 	if (type == S_ABS)
 | |
| 		serror("can't use absolute addresses here");
 | |
| 
 | |
| 	d = expr->val - DOTVAL;
 | |
| 	if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT))
 | |
|         d -= DOTGAIN;
 | |
|     scaledd = d/size;
 | |
| 
 | |
|     /* If this is a reference to an address within this section, and
 | |
|      * it's close enough to the program counter, we can use a
 | |
|      * shorter instruction. */
 | |
| 
 | |
| 	if (small((type==DOTTYP) && fitx(scaledd, 16), 2))
 | |
| 	{
 | |
|         emit2(B16(10101010,00000000) | (opcode<<5) | (rd<<0));
 | |
|         emit2(scaledd);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
| 	/* Otherwise we need the full 48 bits. */
 | |
| 
 | |
| 	newrelo(expr->typ, RELOVC4|RELPC);
 | |
| 
 | |
| 	/* VC4 relocations store the PC-relative delta into the
 | |
| 	 * destination section in the instruction data. The linker will
 | |
| 	 * massage this, and scale it appropriately. */
 | |
| 
 | |
|     if (!fitx(d, 27))
 | |
| 		toobig();
 | |
| 
 | |
|     emit2(B16(11100111,00000000) | (opcode<<5) | (rd<<0));
 | |
|     emit4((31<<27) | maskx(d, 27));
 | |
| }
 | |
| 
 | |
| /* Common code for handling addcmp: merge in as much of expr as will fit to
 | |
|  * the second pair of the addcmp opcode. */
 | |
| 
 | |
| static void branch_addcmp_common(quad opcode, int bits, struct expr_t* expr)
 | |
| {
 | |
| 	quad type = expr->typ & S_TYP;
 | |
| 	int d;
 | |
| 
 | |
| 	if ((pass>0) && (type != DOTTYP))
 | |
| 		serror("can't use this type of branch to jump outside the section");
 | |
| 
 | |
| 	/* The VC4 branch instructions express distance in 2-byte
 | |
| 	 * words. */
 | |
| 
 | |
| 	d = (expr->val - DOTVAL-2 + 4);
 | |
| 	if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT))
 | |
|         d -= DOTGAIN;
 | |
|     d /= 2;
 | |
| 
 | |
| 	if ((pass == 2) && !fitx(d, bits))
 | |
| 		serror("target of branch is too far away");
 | |
| 
 | |
| 	emit2(opcode | maskx(d, bits));
 | |
| }
 | |
| 
 | |
| void branch_addcmp_reg_reg_instr(int cc, int rd, int ra, int rs, struct expr_t* expr)
 | |
| {
 | |
|     if ((rd >= 0x10) || (ra >= 0x10) || (rs >= 0x10))
 | |
|         serror("can only use r0-r15 in this instruction");
 | |
| 
 | |
| 	emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0));
 | |
| 	branch_addcmp_common(B16(00000000,00000000) | (rs<<10), 10, expr);
 | |
| }
 | |
| 
 | |
| void branch_addcmp_lit_reg_instr(int cc, int rd, long va, int rs, struct expr_t* expr)
 | |
| {
 | |
|     if ((rd >= 0x10) || (rs >= 0x10))
 | |
|         serror("can only use r0-r15 in this instruction");
 | |
| 
 | |
| 	if (!fitx(va, 4))
 | |
| 		serror("value too big to encode into instruction");
 | |
|     va = maskx(va, 4);
 | |
| 
 | |
|     emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0));
 | |
|     branch_addcmp_common(B16(01000000,00000000) | (rs<<10), 10, expr);
 | |
| }
 | |
| 
 | |
| void branch_addcmp_reg_lit_instr(int cc, int rd, int ra, long vs, struct expr_t* expr)
 | |
| {
 | |
|     if ((rd >= 0x10) || (ra >= 0x10))
 | |
|         serror("can only use r0-r15 in this instruction");
 | |
| 
 | |
| 	if (!fitx(vs, 6))
 | |
| 		serror("value too big to encode into instruction");
 | |
| 	vs = maskx(vs, 6);
 | |
| 
 | |
| 	emit2(B16(10000000,00000000) | (cc<<8) | (ra<<4) | (rd<<0));
 | |
| 	branch_addcmp_common(B16(10000000,00000000) | (vs<<8), 8, expr);
 | |
| }
 | |
| 
 | |
| void branch_addcmp_lit_lit_instr(int cc, int rd, long va, long vs, struct expr_t* expr)
 | |
| {
 | |
|     if (rd >= 0x10)
 | |
|         serror("can only use r0-r15 in this instruction");
 | |
| 
 | |
| 	if (!fitx(va, 4) || !fitx(vs, 6))
 | |
| 		serror("value too big to encode into instruction");
 | |
| 	va = maskx(va, 4);
 | |
| 	vs = maskx(vs, 6);
 | |
| 
 | |
| 	emit2(B16(10000000,00000000) | (cc<<8) | (va<<4) | (rd<<0));
 | |
| 	branch_addcmp_common(B16(11000000,00000000) | (vs<<8), 8, expr);
 | |
| }
 | |
| 
 | |
| /* lea, where the source is relative to the stack. */
 | |
| 
 | |
| void lea_stack_instr(int rd, long va, int rs)
 | |
| {
 | |
|     if (rs != 25)
 | |
|         serror("source register must be sp");
 | |
| 
 | |
| 	va /= 4;
 | |
| 	if (!fitx(va, 6))
 | |
| 		serror("offset too big to encode in instruction");
 | |
| 	va = maskx(va, 6);
 | |
| 
 | |
| 	emit2(B16(00010000,00000000) | (rd<<0) | (va<<5));
 | |
| }
 | |
| 
 | |
| /* lea, where the source is an address. */
 | |
| 
 | |
| void lea_address_instr(int rd, struct expr_t* expr)
 | |
| {
 | |
| 	quad pc = DOTVAL;
 | |
| 	quad type = expr->typ & S_TYP;
 | |
| 	int d = expr->val - pc;
 | |
| 
 | |
| 	if ((pass == 2) && (d > 0) && !(expr->typ & S_DOT))
 | |
| 	    d -= DOTGAIN;
 | |
| 
 | |
| 	if (type == S_ABS)
 | |
| 		serror("can't use absolute addresses here");
 | |
| 
 | |
| 	newrelo(expr->typ, RELOVC4|RELPC);
 | |
| 
 | |
| 	/* VC4 relocations store the PC-relative delta into the
 | |
| 	 * destination section in the instruction data. The linker will
 | |
| 	 * massage this, and scale it appropriately. */
 | |
| 
 | |
| 	emit2(B16(11100101,00000000) | (rd<<0));
 | |
| 	emit4(expr->val - pc);
 | |
| }
 | |
| 
 | |
| /* Floating point conversion opcodes (ftrunc, floor, flts, fltu). */
 | |
| 
 | |
| void fltcnv_instr(quad opcode, int cc, int rd, int ra, quad shift)
 | |
| {
 | |
| 	fitx(shift, 6);
 | |
| 
 | |
| 	emit2(opcode | (rd<<0));
 | |
| 	emit2(B16(00000000,01000000) | (ra<<11) | (cc<<7) | shift);
 | |
| }
 | |
| 
 |