510 lines
12 KiB
C
510 lines
12 KiB
C
/* $Id: mach5.c, v3.3 25-Apr-89 AJM */
|
|
|
|
branch(brtyp, link, val)
|
|
word_t brtyp;
|
|
word_t link;
|
|
valu_t val;
|
|
{
|
|
valu_t offset;
|
|
|
|
offset = val - DOTVAL - 8; /* Allow for pipeline */
|
|
if ((offset & 0xFC000000) != 0 && (offset & 0xFC000000) != 0xFC000000){
|
|
serror("offset out of range");
|
|
}
|
|
offset = offset>>2 & 0xFFFFFF;
|
|
emit4(brtyp|link|offset);
|
|
return;
|
|
}
|
|
|
|
data(opc, ins, val, typ)
|
|
long opc, ins;
|
|
valu_t val;
|
|
short typ;
|
|
{
|
|
valu_t tmpval;
|
|
int adrflag = 0;
|
|
|
|
if (typ == S_REG){ /* The argument is a register */
|
|
emit4(opc|ins|val);
|
|
return;
|
|
}
|
|
|
|
/* Do a bit of optimisation here, since the backend might produce instructions
|
|
of the type MOV R0, R0, #0. We can ignore these. */
|
|
|
|
if (((opc == ADD) || (opc == SUB)) && (val == 0)){ /* ADD or SUB 0 ? */
|
|
if ((ins & 0x000F0000) == ((ins & 0x0000F000) << 4)) /* Same reg ? */
|
|
return; /* Don't emit anything */
|
|
}
|
|
|
|
/* No optimisation, so carry on ... */
|
|
|
|
ins |= 0x02000000; /* The argument is an immediate value */
|
|
tmpval = val;
|
|
if (opc == 0xff){ /* This is an ADR */
|
|
adrflag = 1;
|
|
opc = MOV;
|
|
}
|
|
|
|
if (typ == S_ABS){ /* An absolute value */
|
|
if (calcimm(&opc, &tmpval, typ)){
|
|
emit4(opc|ins|tmpval);
|
|
return;
|
|
}
|
|
}
|
|
|
|
tmpval = val;
|
|
if (!adrflag){ /* Don't do this for ADRs */
|
|
if (oursmall(calcimm(&opc, &tmpval, typ), 12)){
|
|
emit4(opc|ins|tmpval);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (opc == MOV || opc == MVN || opc == ADD || opc == SUB){
|
|
if (!bflag && pass == PASS_3){ /* Debugging info */
|
|
/* warning("MOV/ADD extension"); */
|
|
/* if (dflag)
|
|
printf("value: %lx\n", val);*/
|
|
}
|
|
if (oursmall((val & 0xFFFF0000) == 0, 8)){
|
|
putaddr(opc, ins, val, 2);
|
|
return;
|
|
}
|
|
if (oursmall((val & 0xFF000000) == 0, 4)){
|
|
putaddr(opc, ins, val, 3);
|
|
return;
|
|
}
|
|
putaddr(opc, ins, val, 4);
|
|
return;
|
|
}
|
|
|
|
if (pass == PASS_1)
|
|
DOTVAL += 16; /* Worst case we can emit */
|
|
else
|
|
serror("immediate value out of range");
|
|
return;
|
|
}
|
|
|
|
|
|
/* Calculate an immediate value. This is not as easy as it sounds, because
|
|
the ARM uses an 8-bit value and 4-bit shift to encode the value into a
|
|
12-bit field. Unfortunately this means that some numbers may not fit at
|
|
all. */
|
|
|
|
calcimm(opc,val,typ)
|
|
word_t *opc;
|
|
valu_t *val;
|
|
short typ;
|
|
{
|
|
int i = 0;
|
|
|
|
if (typ == S_UND)
|
|
return(0); /* Can't do anything with an undefined label */
|
|
|
|
if ((*val & 0xFFFFFF00) == 0) /* Value is positive, but < 256, */
|
|
return(1); /* so doesn't need a shift */
|
|
|
|
if ((~*val & 0xFFFFFF00) == 0){ /* Value is negative, but < 256, */
|
|
if (*opc == AND) /* so no shift required, only */
|
|
{ /* inversion */
|
|
*val = ~*val;
|
|
*opc = BIC;
|
|
return(1);
|
|
}
|
|
if (*opc == MOV)
|
|
{
|
|
*val = ~*val;
|
|
*opc = MVN;
|
|
return(1);
|
|
}
|
|
if (*opc == ADC)
|
|
{
|
|
*val = ~*val;
|
|
*opc = SBC;
|
|
return(1);
|
|
}
|
|
|
|
}
|
|
if ((-1**val & 0xFFFFFF00) == 0){ /* Same idea ... */
|
|
if (*opc == ADD)
|
|
{
|
|
*val *= -1;
|
|
*opc = SUB;
|
|
return(1);
|
|
}
|
|
if (*opc == CMP)
|
|
{
|
|
*val *= -1;
|
|
*opc = CMN;
|
|
return(1);
|
|
}
|
|
}
|
|
|
|
do{ /* Now we need to shift */
|
|
rotateleft2(&*val); /* Rotate left by two bits */
|
|
i++;
|
|
if((*val & 0xFFFFFF00) == 0){ /* Got a value < 256 */
|
|
*val = *val|i<<8; /* OR in the shift */
|
|
return(1);
|
|
}
|
|
if ((~*val & 0xFFFFFF00) == 0){ /* If negative, carry out */
|
|
if (*opc == AND) /* inversion as before */
|
|
{
|
|
*val = ~*val|i<<8;
|
|
*opc = BIC;
|
|
return(1);
|
|
}
|
|
if (*opc == MOV)
|
|
{
|
|
*val = ~*val|i<<8;
|
|
*opc = MVN;
|
|
return(1);
|
|
}
|
|
if (*opc == ADC)
|
|
{
|
|
*val = ~*val|i<<8;
|
|
*opc = SBC;
|
|
return(1);
|
|
}
|
|
}
|
|
}while(i<15);
|
|
|
|
return(0); /* Failed if can't encode it after 16 rotates */
|
|
}
|
|
|
|
|
|
/* Calculate an offset in an address */
|
|
|
|
word_t
|
|
calcoffset(val)
|
|
valu_t val;
|
|
{
|
|
if((val & 0xFFFFF000) == 0)
|
|
return(val|0x00800000);
|
|
val *= -1;
|
|
if((val & 0xFFFFF000) == 0)
|
|
return(val);
|
|
serror("offset out of range");
|
|
return(0);
|
|
}
|
|
|
|
|
|
/* This routine deals with STR and LDR instructions */
|
|
|
|
strldr(opc, ins, val)
|
|
long opc, ins;
|
|
valu_t val;
|
|
{
|
|
|
|
long reg, reg2; /* The registers we are using */
|
|
long tmpval;
|
|
|
|
/* If the expression was a register, then just output it and save 24
|
|
bytes */
|
|
|
|
if (success){
|
|
emit4(opc|ins|val);
|
|
return;
|
|
}
|
|
|
|
reg = ins & 0x0000F000; /* Extract register from instruction */
|
|
|
|
if (opc == LDR){
|
|
|
|
tmpval = val - DOTVAL - 8;
|
|
if (oursmall((tmpval & 0xFFFFF000) == 0, 16)){ /* If it's +ve */
|
|
emit4(opc|ins|tmpval|0x018F0000); /* PC rel, up bit */
|
|
return;
|
|
}
|
|
|
|
tmpval *= -1;
|
|
if (oursmall((tmpval & 0xFFFFF000) == 0, 16)){ /* If it's -ve */
|
|
emit4(opc|ins|tmpval|0x010F0000); /* PC rel, no up bit */
|
|
return;
|
|
}
|
|
|
|
if (!bflag && pass == PASS_3){ /* Debugging info */
|
|
/* warning("LDR address extension"); */
|
|
if (dflag)
|
|
printf("value: %lx\n", val);
|
|
}
|
|
|
|
opc = 0x03A00000; /* Set opc for putaddr */
|
|
|
|
if (oursmall((val & 0xFFFF0000) == 0, 8)){
|
|
putaddr(opc, ins & 0xFFBFFFFF, val, 2);
|
|
emit4(0x05100000|ins|reg<<4);
|
|
return;
|
|
}
|
|
if (oursmall((val & 0xFF000000) == 0, 4)){
|
|
putaddr(opc, ins & 0xFFBFFFFF, val, 3);
|
|
emit4(0x05100000|ins|reg<<4);
|
|
return;
|
|
}
|
|
putaddr(opc, ins & 0xFFBFFFFF, val, 4);
|
|
emit4(0x05100000|ins|reg<<4);
|
|
return;
|
|
}
|
|
|
|
/* If the failure was an STR instruction, things are a bit more complicated as
|
|
we can't overwrite the register before we store its value. We therefore
|
|
need to use another register as well, which must be saved and restored.
|
|
This register is saved on a stack pointed to by R12. Apart from this
|
|
complication, the scheme is similar to the LDR above. */
|
|
|
|
if (opc == STR){
|
|
reg2 = reg >> 12; /* Use R6 as the second register, */
|
|
reg2 = (reg2 == 6 ? 0 : 6); /* or R0 if we can't */
|
|
|
|
tmpval = val - DOTVAL - 8;
|
|
if (oursmall((tmpval & 0xFFFFF000) == 0, 24)){ /* If it's +ve */
|
|
emit4(opc|ins|tmpval|0x018F0000); /* PC rel, up bit */
|
|
return;
|
|
}
|
|
|
|
tmpval *= -1;
|
|
if (oursmall((tmpval & 0xFFFFF000) == 0, 24)){ /* If it's -ve */
|
|
emit4(opc|ins|tmpval|0x010F0000); /* PC rel, no up bit */
|
|
return;
|
|
}
|
|
|
|
if (!bflag && pass == PASS_3){ /* Debugging info */
|
|
/* warning("STR address extension"); */
|
|
if (dflag)
|
|
printf("value: %lx\n", val);
|
|
}
|
|
|
|
opc = 0x03A00000; /* Set opc for putaddr */
|
|
|
|
if (oursmall((val & 0xFFFF0000) == 0, 8)){
|
|
emit4(0xE92C0000|1<<reg2);
|
|
putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 2);
|
|
emit4(0x05000000|ins|reg2<<16);
|
|
emit4(0xE8BC0000|1<<reg2);
|
|
return;
|
|
}
|
|
if (oursmall((val & 0xFF000000) == 0, 4)){
|
|
emit4(0xE92C0000|1<<reg2);
|
|
putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 3);
|
|
emit4(0x05000000|ins|reg2<<16);
|
|
emit4(0xE8BC0000|1<<reg2);
|
|
return;
|
|
}
|
|
emit4(0xE92C0000|1<<reg2);
|
|
putaddr(opc, (ins & 0xFFBF0FFF)|reg2<<12, val, 4);
|
|
emit4(0x05000000|ins|reg2<<16);
|
|
emit4(0xE8BC0000|1<<reg2);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/* This routine deals with ADR instructions. The ARM does not have a
|
|
'calculate effective address' instruction, so we use ADD, SUB, MOV or
|
|
MVN instead. ADR is not a genuine instruction, but is provided to make
|
|
life easier. At present these are all calculated by using a MOV and
|
|
successive ADDs. Even if the address will fit into a single MOV, we
|
|
still use two instructions; the second is a no-op. This is to cure the
|
|
optimisation problem with mobile addresses ! */
|
|
|
|
|
|
calcadr(ins, reg, val, typ)
|
|
word_t ins, reg;
|
|
valu_t val;
|
|
short typ;
|
|
{
|
|
valu_t tmpval = val;
|
|
word_t opc = 0xff; /* Dummy opc used as a flag for data() */
|
|
|
|
/* First check that the address is in range */
|
|
|
|
if (val < 0)
|
|
tmpval = ~tmpval; /* Invert negative addresses for check */
|
|
|
|
if ((tmpval & 0xFC000000) && (typ != S_UND)){
|
|
serror("adr address out of range");
|
|
return;
|
|
}
|
|
|
|
/* Can't do it PC relative, so use an absolute MOV instead */
|
|
|
|
data (opc, ins|reg<<12, val, typ);
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
word_t
|
|
calcshft(val, typ, styp)
|
|
valu_t val;
|
|
short typ;
|
|
word_t styp;
|
|
{
|
|
if (typ == S_UND)
|
|
return(0);
|
|
|
|
if (val & 0xFFFFFFE0)
|
|
serror("shiftcount out of range");
|
|
|
|
if (styp && !val)
|
|
warning("shiftcount 0");
|
|
|
|
return((val & 0x1F)<<7);
|
|
}
|
|
|
|
rotateleft2(x)
|
|
long *x;
|
|
{
|
|
unsigned long bits;
|
|
|
|
bits = *x & 0xC0000000;
|
|
*x <<= 2 ;
|
|
if (bits){
|
|
bits >>= 30;
|
|
*x |= bits;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
This routine overcomes the 12-bit encoding problem by outputting a number
|
|
a byte at a time. For a MOV, it first uses a MOV, then successive ADDs.
|
|
It will not use any more ADDs than needed to completely output the number.
|
|
A similar approach is used for ADDs and SUBs.
|
|
There is a problem here with optimisation in the third pass; if the
|
|
instruction needed two ADDs in the second pass, but only one in the third
|
|
pass, then the second ADD is replaced with a no-op. We cannot emit one
|
|
less instruction, because that will upset other addresses.
|
|
*/
|
|
|
|
putaddr(opc, ins, val, count)
|
|
long opc, ins, val;
|
|
int count;
|
|
{
|
|
long tmpval = val;
|
|
long reg = ins & 0x0000F000;
|
|
|
|
emit4(opc|ins|(val & 0x000000FF));
|
|
|
|
tmpval = (val & 0x0000FF00) >> 8 | 0x00000C00;
|
|
|
|
/* Decide what to use for the additional instructions */
|
|
|
|
if (opc == 0x03a00000) /* This one is for strldr */
|
|
opc = 0x02800000;
|
|
|
|
if (opc == MOV)
|
|
opc = ADD;
|
|
|
|
if (opc == MVN)
|
|
opc = SUB;
|
|
|
|
if ((tmpval & 0x000000FF) != 0)
|
|
emit4(opc|ins|reg<<4|tmpval);
|
|
else
|
|
emit4(0xF0000000); /* No-op if a zero argument */
|
|
|
|
if (count == 3 || count == 4){ /* Must use three or more instructions */
|
|
if ((val & 0xFFFF0000) != 0){
|
|
tmpval = (val & 0x00FF0000) >> 16 | 0x00000800;
|
|
emit4(opc|ins|reg<<4|tmpval);
|
|
}
|
|
else
|
|
emit4(0xF0000000); /* No-op */
|
|
}
|
|
|
|
if (count == 4){ /* Must use four instructions */
|
|
if ((val & 0xFF000000) != 0){
|
|
tmpval = (val & 0xFF000000) >> 24 | 0x00000400;
|
|
emit4(opc|ins|reg<<4|tmpval);
|
|
}
|
|
else
|
|
emit4(0xF0000000); /* No-op */
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/* The following piece of code is stolen from comm7.c; it needs some minor
|
|
fixes for the ARM, so it is included here rather than altering the existing
|
|
code. It maintains a bit table to say whether or not an optimisation is
|
|
possible. The original had some problems:
|
|
(a). It assumed that the memory returned by malloc() was cleared to zero.
|
|
This is true on a Sun, but not under Minix; small() should really
|
|
use calloc() instead.
|
|
(b). It assumed that if an optimisation was possible in pass 2, it must
|
|
also be possible in pass 3, and produced an assertion error if it
|
|
wasn't. This is OK for optimising things like long or short branch
|
|
instructions on a 68000, but not for ADRs on the ARM. A previous
|
|
optimisation may place an address out of 12-bit encoding range on
|
|
pass 3, when it was in range on pass 2. However we have to be
|
|
careful here .....
|
|
*/
|
|
|
|
#define PBITTABSZ 128
|
|
static char *pbittab[PBITTABSZ];
|
|
|
|
oursmall(fitsmall, gain)
|
|
{
|
|
register bit;
|
|
register char *p;
|
|
|
|
if (DOTSCT == NULL)
|
|
nosect();
|
|
if (bflag)
|
|
return(0);
|
|
if (nbits == BITCHUNK) {
|
|
bitindex++;
|
|
nbits = 0;
|
|
if (bitindex == PBITTABSZ) {
|
|
static int w_given;
|
|
if (pass == PASS_1 && ! w_given) {
|
|
w_given = 1;
|
|
warning("bit table overflow");
|
|
}
|
|
return(0);
|
|
}
|
|
if (pbittab[bitindex] == 0 && pass == PASS_1) {
|
|
if ((pbittab[bitindex] = malloc(MEMINCR)) == 0) {
|
|
static int w2_given;
|
|
|
|
if (!w2_given) {
|
|
w2_given = 1;
|
|
warning("out of space for bit table");
|
|
}
|
|
}
|
|
}
|
|
if (pbittab[bitindex] == 0)
|
|
return (0);
|
|
}
|
|
bit = 1 << (nbits&7);
|
|
p = pbittab[bitindex]+(nbits>>3);
|
|
nbits++;
|
|
|
|
switch (pass) {
|
|
case PASS_1:
|
|
*p = 0;
|
|
return(0);
|
|
case PASS_2:
|
|
if (fitsmall) {
|
|
DOTGAIN += gain;
|
|
*p |= bit;
|
|
}
|
|
return(fitsmall);
|
|
case PASS_3:
|
|
if (!(fitsmall || (*p & bit) == 0)){
|
|
printf("line: %ld - small failed\n", lineno);
|
|
printf("fitsmall: %d bit: %d\n", fitsmall, (*p & bit));
|
|
if (fitsmall)
|
|
return(0);
|
|
else
|
|
serror("This one is fatal!");
|
|
}
|
|
return(*p & bit);
|
|
}
|
|
/*NOTREACHED*/
|
|
}
|