From 9ddbb66c8b0c53738d3d9607aa584a91412acd81 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 30 Jan 2017 15:45:46 -0500 Subject: [PATCH 01/12] Turn off comments again. I turned them on by accident in c416889. --- mach/powerpc/ncg/table | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 7cc4bbbca..e76cadb33 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -10,7 +10,7 @@ INT64 = 8 FP_OFFSET = 0 /* Offset of saved FP relative to our FP */ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ -#define COMMENT(n) comment {LABEL, n} +#define COMMENT(n) /* comment {LABEL, n} */ #define nicesize(x) ((x)==INT8 || (x)==INT16 || (x)==INT32 || (x)==INT64) From 48e3aab72858159e9537de9655bab8d6a0a9d97c Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 30 Jan 2017 15:47:09 -0500 Subject: [PATCH 02/12] Swap RA and RS when assembling "and", "or", and such instructions. They must use OP_RA_RS_RB_C instead of OP_RS_RA_RB_C. The code generator often sets RS and RA to the same register, so swapping them causes no change in many programs. I also rename OP_RS_RA_UI_CC to OP_RA_RS_UI_CC, and OP_RS_RA_C to OP_RA_RS_C, because they already swap RA and RS. --- mach/powerpc/as/mach2.c | 6 +++--- mach/powerpc/as/mach3.c | 38 +++++++++++++++++++------------------- mach/powerpc/as/mach4.c | 6 +++--- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/mach/powerpc/as/mach2.c b/mach/powerpc/as/mach2.c index 555b92c38..4065334e6 100644 --- a/mach/powerpc/as/mach2.c +++ b/mach/powerpc/as/mach2.c @@ -51,6 +51,7 @@ %token OP_LIA %token OP_LIL %token OP_LI32 +%token OP_RA_RS_C %token OP_RA_RS_RB_C %token OP_RA_RS_RB_MB5_ME5_C %token OP_RA_RS_RB_MB6_C @@ -58,17 +59,16 @@ %token OP_RA_RS_SH5_MB5_ME5_C %token OP_RA_RS_SH6_C %token OP_RA_RS_SH6_MB6_C +%token OP_RA_RS_UI +%token OP_RA_RS_UI_CC %token OP_RS_FXM %token OP_RS_RA -%token OP_RS_RA_C %token OP_RS_RA_D %token OP_RS_RA_DS %token OP_RS_RA_NB %token OP_RS_RA_RB %token OP_RS_RA_RB_C %token OP_RS_RA_RA_C -%token OP_RS_RA_UI -%token OP_RS_RA_UI_CC %token OP_RS_RB %token OP_RS_SPR %token OP_RS_SR diff --git a/mach/powerpc/as/mach3.c b/mach/powerpc/as/mach3.c index 16c1e6ae0..91b088a6a 100644 --- a/mach/powerpc/as/mach3.c +++ b/mach/powerpc/as/mach3.c @@ -498,25 +498,25 @@ 0, OP_TOX_RA_SI, 3<<26 | 31<<21, "twui", /* page 62 */ -0, OP_RS_RA_UI_CC, 28<<26, "andi", /* C compulsory */ -0, OP_RS_RA_UI_CC, 29<<26, "andis", /* C compulsory */ -0, OP_RS_RA_UI, 24<<26, "ori", -0, OP_RS_RA_UI, 25<<26, "oris", -0, OP_RS_RA_UI, 26<<26, "xori", -0, OP_RS_RA_UI, 27<<26, "xoris", -0, OP_RS_RA_RB_C, 31<<26 | 28<<1, "and", -0, OP_RS_RA_RB_C, 31<<26 | 444<<1, "or", -0, OP_RS_RA_RB_C, 31<<26 | 316<<1, "xor", -0, OP_RS_RA_RB_C, 31<<26 | 476<<1, "nand", -0, OP_RS_RA_RB_C, 31<<26 | 124<<1, "nor", -0, OP_RS_RA_RB_C, 31<<26 | 284<<1, "eqv", -0, OP_RS_RA_RB_C, 31<<26 | 60<<1, "andc", -0, OP_RS_RA_RB_C, 31<<26 | 412<<1, "orc", -0, OP_RS_RA_C, 31<<26 | 954<<1, "extsb", -0, OP_RS_RA_C, 31<<26 | 922<<1, "extsh", -0, OP_RS_RA_C, 31<<26 | 986<<1, "extsw", -0, OP_RS_RA_C, 31<<26 | 58<<1, "cntlzd", -0, OP_RS_RA_C, 31<<26 | 26<<1, "cntlzw", +0, OP_RA_RS_UI_CC, 28<<26, "andi", /* C compulsory */ +0, OP_RA_RS_UI_CC, 29<<26, "andis", /* C compulsory */ +0, OP_RA_RS_UI, 24<<26, "ori", +0, OP_RA_RS_UI, 25<<26, "oris", +0, OP_RA_RS_UI, 26<<26, "xori", +0, OP_RA_RS_UI, 27<<26, "xoris", +0, OP_RA_RS_RB_C, 31<<26 | 28<<1, "and", +0, OP_RA_RS_RB_C, 31<<26 | 444<<1, "or", +0, OP_RA_RS_RB_C, 31<<26 | 316<<1, "xor", +0, OP_RA_RS_RB_C, 31<<26 | 476<<1, "nand", +0, OP_RA_RS_RB_C, 31<<26 | 124<<1, "nor", +0, OP_RA_RS_RB_C, 31<<26 | 284<<1, "eqv", +0, OP_RA_RS_RB_C, 31<<26 | 60<<1, "andc", +0, OP_RA_RS_RB_C, 31<<26 | 412<<1, "orc", +0, OP_RA_RS_C, 31<<26 | 954<<1, "extsb", +0, OP_RA_RS_C, 31<<26 | 922<<1, "extsh", +0, OP_RA_RS_C, 31<<26 | 986<<1, "extsw", +0, OP_RA_RS_C, 31<<26 | 58<<1, "cntlzd", +0, OP_RA_RS_C, 31<<26 | 26<<1, "cntlzw", /* extended m using logic */ 0, OP_RS_RA_RA_C, 31<<26 | 444<<1, "mr", diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 99f7f4537..7fca36e42 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -42,6 +42,7 @@ operation | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRT_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_C c FPR { emit4($1 | $2 | ($3<<21)); } + | OP_RA_RS_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } | OP_RA_RS_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RA_RS_RB_MB5_ME5_C c GPR ',' GPR ',' GPR ',' u5 ',' u5 @@ -58,6 +59,8 @@ operation { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7)); } | OP_RA_RS_SH6_MB6_C c GPR ',' GPR ',' u6 ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7) | MB6($9)); } + | OP_RA_RS_UI GPR ',' GPR ',' e16 { emit4($1 | ($4<<21) | ($2<<16) | $6); } + | OP_RA_RS_UI_CC C GPR ',' GPR ',' e16 { emit4($1 | ($5<<21) | ($3<<16) | $7); } | OP_RT GPR { emit4($1 | ($2<<21)); } | OP_RT_RA_C c GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16)); } | OP_RT_RA_D GPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } @@ -73,12 +76,9 @@ operation | OP_RT_SI GPR ',' e16 { emit4($1 | ($2<<21) | $4); } | OP_RT_SPR GPR ',' spr_num { emit4($1 | ($2<<21) | ($4<<11)); } | OP_RS_FXM u7 ',' GPR { emit4($1 | ($4<<21) | ($2<<12)); } - | OP_RS_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } | OP_RS_RA_D GPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_DS GPR ',' ds '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } - | OP_RS_RA_UI GPR ',' GPR ',' e16 { emit4($1 | ($4<<21) | ($2<<16) | $6); } - | OP_RS_RA_UI_CC C GPR ',' GPR ',' e16 { emit4($1 | ($5<<21) | ($3<<16) | $7); } | OP_RS_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RS_RA_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } From 3c1d2d79f0817602ac5aa8e8ee7069960d90cc6d Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 30 Jan 2017 16:15:02 -0500 Subject: [PATCH 03/12] Remove type quad, use type word_t in PowerPC as. Type word_t is for encoding the machine instructions. It only needs 32 bits for PowerPC. It was long (which can have 32 or 64 bits), and there was a second type quad (which was uint32_t). Switch word_t to uint32_t and replace quad with word_t. Also change valu_t and ADDR_T away from long. --- mach/powerpc/as/mach0.c | 8 +++---- mach/powerpc/as/mach1.c | 4 ++-- mach/powerpc/as/mach4.c | 46 ++++++++++++++++++++--------------------- mach/powerpc/as/mach5.c | 12 +++++------ 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/mach/powerpc/as/mach0.c b/mach/powerpc/as/mach0.c index 3246828fc..1c2051753 100644 --- a/mach/powerpc/as/mach0.c +++ b/mach/powerpc/as/mach0.c @@ -11,15 +11,13 @@ #define DEBUG 0 #undef valu_t -#define valu_t long +#define valu_t int32_t #undef ADDR_T -#define ADDR_T long +#define ADDR_T uint32_t #undef word_t -#define word_t long - -typedef uint32_t quad; +#define word_t uint32_t #undef ALIGNWORD #define ALIGNWORD 4 diff --git a/mach/powerpc/as/mach1.c b/mach/powerpc/as/mach1.c index c1651fcfe..a1977f466 100644 --- a/mach/powerpc/as/mach1.c +++ b/mach/powerpc/as/mach1.c @@ -5,5 +5,5 @@ #include -extern quad emit_hi(struct expr_t* expr, bool is_signed); -extern quad emit_lo(struct expr_t* expr); +extern word_t emit_hi(struct expr_t* expr, bool is_signed); +extern word_t emit_lo(struct expr_t* expr); diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 7fca36e42..7464dcb6a 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -94,7 +94,7 @@ operation | OP_LI32 li32 /* emitted in subrule */ | OP_clrlsldi c GPR ',' GPR ',' u6 ',' u6 { - quad mb = ($7 - $9) & 0x3f; + word_t mb = ($7 - $9) & 0x3f; fit($9 <= $7); emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($9) | MB6(mb)); } @@ -104,41 +104,41 @@ operation } | OP_clrrdi c GPR ',' GPR ',' u6 { - quad me = 63 - $7; + word_t me = 63 - $7; emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(0) | MB6(me)); } | OP_extldi c GPR ',' GPR ',' u6 ',' u6 { - quad me = ($7 - 1) & 0x3f; + word_t me = ($7 - 1) & 0x3f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($9) | MB6(me)); } | OP_extrdi c GPR ',' GPR ',' u6 ',' u6 { - quad sh = ($9 + $7) & 0x3f; - quad mb = (64 - $7) & 0x3f; + word_t sh = ($9 + $7) & 0x3f; + word_t mb = (64 - $7) & 0x3f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6(mb)); } | OP_rotrdi c GPR ',' GPR ',' u6 { - quad sh = (64 - $7) & 0x3f; + word_t sh = (64 - $7) & 0x3f; emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6(0)); } | OP_sldi c GPR ',' GPR ',' u6 { - quad me = 63 - $7; + word_t me = 63 - $7; emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7) | MB6(me)); } | OP_srdi c GPR ',' GPR ',' u6 { - quad sh = (64 - $7) & 0x3f; + word_t sh = (64 - $7) & 0x3f; emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6(sh) | MB6($7)); } | OP_clrlslwi c GPR ',' GPR ',' u5 ',' u5 { - quad mb = ($7 - $9) & 0x1f; - quad me = 31 - $9; + word_t mb = ($7 - $9) & 0x1f; + word_t me = 31 - $9; fit($9 <= $7); emit4($1 | $2 | ($5<<21) | ($3<<16) | ($9<<11) | (mb<<6) | (me<<1)); @@ -150,56 +150,56 @@ operation } | OP_clrrwi c GPR ',' GPR ',' u5 { - quad me = 31 - $7; + word_t me = 31 - $7; emit4($1 | $2 | ($5<<21) | ($3<<16) | (0<<11) | (0<<6) | (me<<1)); } | OP_extlwi c GPR ',' GPR ',' u5 ',' u5 { - quad me = ($7 - 1) & 0x1f; + word_t me = ($7 - 1) & 0x1f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | ($9<<11) | (0<<6) | (me<<1)); } | OP_extrwi c GPR ',' GPR ',' u5 ',' u5 { - quad sh = ($9 + $7) & 0x1f; - quad mb = (32 - $7) & 0x1f; + word_t sh = ($9 + $7) & 0x1f; + word_t mb = (32 - $7) & 0x1f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | (sh<<11) | (mb<<6) | (31<<1)); } | OP_inslwi c GPR ',' GPR ',' u5 ',' u5 { - quad sh = (32 - $9) & 0x1f; - quad me = ($9 + $7 - 1) & 0x1f; + word_t sh = (32 - $9) & 0x1f; + word_t me = ($9 + $7 - 1) & 0x1f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | (sh<<11) | ($9<<6) | (me<<1)); } | OP_insrwi c GPR ',' GPR ',' u5 ',' u5 { - quad sh = (32 - $9 - $7) & 0x1f; - quad me = ($9 + $7 - 1) & 0x1f; + word_t sh = (32 - $9 - $7) & 0x1f; + word_t me = ($9 + $7 - 1) & 0x1f; fit($7 > 0); emit4($1 | $2 | ($5<<21) | ($3<<16) | (sh<<11) | ($9<<6) | (me<<1)); } | OP_rotrwi c GPR ',' GPR ',' u5 { - quad sh = (32 - $7) & 0x1f; + word_t sh = (32 - $7) & 0x1f; emit4($1 | $2 | ($5<<21) | ($3<<16) | (sh<<11) | (0<<6) | (31<<1)); } | OP_slwi c GPR ',' GPR ',' u5 { - quad me = 31 - $7; + word_t me = 31 - $7; emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11) | (0<<6) | (me<<1)); } | OP_srwi c GPR ',' GPR ',' u5 { - quad sh = (32 - $7) & 0x1f; + word_t sh = (32 - $7) & 0x1f; emit4($1 | $2 | ($5<<21) | ($3<<16) | (sh<<11) | ($7<<6) | (31<<1)); } @@ -367,8 +367,8 @@ bda li32 : GPR ',' expr { - quad type = $3.typ & S_TYP; - quad val = $3.val; + word_t type = $3.typ & S_TYP; + word_t val = $3.val; if ((type == S_ABS) && (val <= 0xffff)) emit4((14<<26) | ($1<<21) | (0<<16) | val); /* addi */ else diff --git a/mach/powerpc/as/mach5.c b/mach/powerpc/as/mach5.c index e3e23f272..87d514053 100644 --- a/mach/powerpc/as/mach5.c +++ b/mach/powerpc/as/mach5.c @@ -1,10 +1,10 @@ -quad emit_hi(struct expr_t* expr, bool is_signed) +word_t emit_hi(struct expr_t* expr, bool is_signed) { /* If this is a symbol reference, discard the symbol and keep only the * offset part. */ - quad type = expr->typ & S_TYP; - quad val = expr->val; + word_t type = expr->typ & S_TYP; + word_t val = expr->val; uint16_t hi = val >> 16; uint16_t lo = val & 0xffff; @@ -23,10 +23,10 @@ quad emit_hi(struct expr_t* expr, bool is_signed) return hi; } -quad emit_lo(struct expr_t* expr) +word_t emit_lo(struct expr_t* expr) { - quad type = expr->typ & S_TYP; - quad val = expr->val; + word_t type = expr->typ & S_TYP; + word_t val = expr->val; /* If the assembler stored a symbol for relocation later, we need to * abandon it (because the relocation was generated by emit_ha). */ From f4cfbedd5c74e82494770afd62f9fe22e1cba8c7 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Mon, 30 Jan 2017 16:39:23 -0500 Subject: [PATCH 04/12] Remove #include from mach/powerpc/as/mach1.c We should not include a system header file here, because mach/proto/as/comm2.y goes through cpp twice. The include can cause problems like https://github.com/davidgiven/ack/issues/1 Remove this include # and leave a comment pointing to the includes in comm0.h. Change the few instances of bool, false, true, to int, 0, 1. --- mach/powerpc/as/mach1.c | 6 ++++-- mach/powerpc/as/mach4.c | 4 ++-- mach/powerpc/as/mach5.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mach/powerpc/as/mach1.c b/mach/powerpc/as/mach1.c index a1977f466..44b415ff8 100644 --- a/mach/powerpc/as/mach1.c +++ b/mach/powerpc/as/mach1.c @@ -3,7 +3,9 @@ * $State$ */ -#include +/* + * Do not #include anything here. Do it in mach/proto/as/comm0.h + */ -extern word_t emit_hi(struct expr_t* expr, bool is_signed); +extern word_t emit_hi(struct expr_t* expr, int is_signed); extern word_t emit_lo(struct expr_t* expr); diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 7464dcb6a..640573fa1 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -218,8 +218,8 @@ e16 serror("16-bit value out of range"); $$ = (uint16_t) $1; } - | OP_HI ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, false); } - | OP_HA ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, true); } + | OP_HI ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, 0); } + | OP_HA ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, 1); } | OP_LO ASC_LPAR expr ASC_RPAR { $$ = emit_lo(&$3); } ; diff --git a/mach/powerpc/as/mach5.c b/mach/powerpc/as/mach5.c index 87d514053..47460790c 100644 --- a/mach/powerpc/as/mach5.c +++ b/mach/powerpc/as/mach5.c @@ -1,5 +1,5 @@ -word_t emit_hi(struct expr_t* expr, bool is_signed) +word_t emit_hi(struct expr_t* expr, int is_signed) { /* If this is a symbol reference, discard the symbol and keep only the * offset part. */ From 1bf58cf51c9bff076485e249ef5b708483055b4a Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 8 Feb 2017 11:46:31 -0500 Subject: [PATCH 05/12] Add RELOLIS for PowerPC lis with ha16 or hi16. The new relocation type RELOLIS handles these instructions: lis RT, ha16[expr] == addis RT, r0, ha16[expr] lis RT, hi16[expr] == addis RT, r0, hi16[expr] RELOLIS stores a 32-bit value in the program text. In this value, the high bit is a ha16 flag, the next 5 bits are the target register RT, and the low bits are a signed 26-bit offset. The linker replaces this value with the lis instruction. The old RELOPPC relocated a ha16/lo16 or hi16/lo16 pair. The new RELOLIS relocates only a ha16 or hi16, so it is no longer necessary to have a matching lo16 in the next instruction. The disadvantage is that RELOLIS has only a signed 26-bit offset, not a 32-bit offset. Switch the assembler to use RELOLIS for ha16 or hi16 and RELO2 for lo16. The li32 instruction still uses the old RELOPPC relocation. This is not the same as my RELOPPC change from my recent mail to tack-devel (https://sourceforge.net/p/tack/mailman/message/35651528/). This commit is on a different branch. Here I am throwing away my RELOPPC change and instead trying RELOLIS. --- h/out.h | 6 +-- mach/powerpc/as/mach1.c | 5 ++- mach/powerpc/as/mach4.c | 41 ++++++++--------- mach/powerpc/as/mach5.c | 99 ++++++++++++++++++++++++++++------------- util/amisc/ashow.c | 3 ++ util/led/relocate.c | 48 ++++++++++++++++++-- 6 files changed, 142 insertions(+), 60 deletions(-) diff --git a/h/out.h b/h/out.h index 45289f313..6aeffc446 100644 --- a/h/out.h +++ b/h/out.h @@ -65,9 +65,9 @@ struct outname { #define RELO1 1 /* 1 byte */ #define RELO2 2 /* 2 bytes */ #define RELO4 3 /* 4 bytes */ -#define RELOPPC 4 /* PowerPC 26-bit address */ -/* relo 5 is unused */ -#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */ +#define RELOPPC 4 /* PowerPC 26-bit address */ +#define RELOLIS 5 /* PowerPC lis */ +#define RELOVC4 6 /* VideoCore IV address in 32-bit instruction */ #define RELPC 0x2000 /* pc relative */ #define RELBR 0x4000 /* High order byte lowest address. */ diff --git a/mach/powerpc/as/mach1.c b/mach/powerpc/as/mach1.c index 44b415ff8..50f799684 100644 --- a/mach/powerpc/as/mach1.c +++ b/mach/powerpc/as/mach1.c @@ -7,5 +7,6 @@ * Do not #include anything here. Do it in mach/proto/as/comm0.h */ -extern word_t emit_hi(struct expr_t* expr, int is_signed); -extern word_t emit_lo(struct expr_t* expr); +void no_hl(void); +word_t eval_hl(struct expr_t* expr, int token); +void emit_hl(word_t in); diff --git a/mach/powerpc/as/mach4.c b/mach/powerpc/as/mach4.c index 640573fa1..8a0cca9de 100644 --- a/mach/powerpc/as/mach4.c +++ b/mach/powerpc/as/mach4.c @@ -10,11 +10,11 @@ operation | OP_BF_BFA CR ',' CR { emit4($1 | ($2<<23) | ($4<<18)); } | OP_BF_FRA_FRB CR ',' FPR ',' FPR { emit4($1 | ($2<<23) | ($4<<16) | ($6<<11)); } | OP_BF_L_RA_RB CR ',' u1 ',' GPR ',' GPR { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | ($8<<11)); } - | OP_BF_L_RA_SI CR ',' u1 ',' GPR ',' e16 { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } - | OP_BF_L_RA_UI CR ',' u1 ',' GPR ',' e16 { emit4($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } + | OP_BF_L_RA_SI CR ',' u1 ',' GPR ',' e16 { emit_hl($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } + | OP_BF_L_RA_UI CR ',' u1 ',' GPR ',' e16 { emit_hl($1 | ($2<<23) | ($4<<21) | ($6<<16) | $8); } | OP_BF_RA_RB cr_opt GPR ',' GPR { emit4($1 | ($2<<23) | ($3<<16) | ($5<<11)); } - | OP_BF_RA_SI cr_opt GPR ',' e16 { emit4($1 | ($2<<23) | ($3<<16) | $5); } - | OP_BF_RA_UI cr_opt GPR ',' e16 { emit4($1 | ($2<<23) | ($3<<16) | $5); } + | OP_BF_RA_SI cr_opt GPR ',' e16 { emit_hl($1 | ($2<<23) | ($3<<16) | $5); } + | OP_BF_RA_UI cr_opt GPR ',' e16 { emit_hl($1 | ($2<<23) | ($3<<16) | $5); } | OP_BF_U_C c CR ',' u4 { emit4($1 | $2 | ($3<<23) | ($5<<12)); } | OP_BH { emit4($1); } | OP_BH u2 { emit4($1 | ($2<<11)); } @@ -33,13 +33,13 @@ operation | OP_BT_BT_BT u5 { emit4($1 | ($2<<21) | ($2<<16) | ($2<<11)); } | OP_BT_C c u5 { emit4($1 | $2 | ($3<<21)); } | OP_FLM_FRB_C c u8 ',' FPR { emit4($1 | $2 | ($3<<17) | ($5<<11)); } - | OP_FRS_RA_D FPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } + | OP_FRS_RA_D FPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRS_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_FRA_FRB_C c FPR ',' FPR ',' FPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($7<<11)); } | OP_FRT_FRA_FRC_FRB_C c FPR ',' FPR ',' FPR ',' FPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($9<<11) | ($7<<6)); } | OP_FRT_FRA_FRC_C c FPR ',' FPR ',' FPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($7<<6)); } | OP_FRT_FRB_C c FPR ',' FPR { emit4($1 | $2 | ($3<<21) | ($5<<11)); } - | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } + | OP_FRT_RA_D FPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_FRT_RA_RB FPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_FRT_C c FPR { emit4($1 | $2 | ($3<<21)); } | OP_RA_RS_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16)); } @@ -59,34 +59,34 @@ operation { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7)); } | OP_RA_RS_SH6_MB6_C c GPR ',' GPR ',' u6 ',' u6 { emit4($1 | $2 | ($5<<21) | ($3<<16) | SH6($7) | MB6($9)); } - | OP_RA_RS_UI GPR ',' GPR ',' e16 { emit4($1 | ($4<<21) | ($2<<16) | $6); } - | OP_RA_RS_UI_CC C GPR ',' GPR ',' e16 { emit4($1 | ($5<<21) | ($3<<16) | $7); } + | OP_RA_RS_UI GPR ',' GPR ',' e16 { emit_hl($1 | ($4<<21) | ($2<<16) | $6); } + | OP_RA_RS_UI_CC C GPR ',' GPR ',' e16 { emit_hl($1 | ($5<<21) | ($3<<16) | $7); } | OP_RT GPR { emit4($1 | ($2<<21)); } | OP_RT_RA_C c GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16)); } - | OP_RT_RA_D GPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } - | OP_RT_RA_DS GPR ',' ds '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } + | OP_RT_RA_D GPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } + | OP_RT_RA_DS GPR ',' ds '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RT_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RT_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RT_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($5<<16) | ($7<<11)); } - | OP_RT_RA_SI GPR ',' GPR ',' e16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } - | OP_RT_RA_SI_addic c GPR ',' GPR ',' e16 { emit4($1 | ($2<<26) | ($3<<21) | ($5<<16) | $7); } + | OP_RT_RA_SI GPR ',' GPR ',' e16 { emit_hl($1 | ($2<<21) | ($4<<16) | $6); } + | OP_RT_RA_SI_addic c GPR ',' GPR ',' e16 { emit_hl($1 | ($2<<26) | ($3<<21) | ($5<<16) | $7); } | OP_RT_RA_SI_subi GPR ',' GPR ',' negate16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } | OP_RT_RA_SI_subic c GPR ',' GPR ',' negate16 { emit4($1 | ($2<<26) | ($3<<21) | ($5<<16) | $7); } | OP_RT_RB_RA_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($3<<21) | ($7<<16) | ($5<<11)); } - | OP_RT_SI GPR ',' e16 { emit4($1 | ($2<<21) | $4); } + | OP_RT_SI GPR ',' e16 { emit_hl($1 | ($2<<21) | $4); } | OP_RT_SPR GPR ',' spr_num { emit4($1 | ($2<<21) | ($4<<11)); } | OP_RS_FXM u7 ',' GPR { emit4($1 | ($4<<21) | ($2<<12)); } - | OP_RS_RA_D GPR ',' e16 '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } - | OP_RS_RA_DS GPR ',' ds '(' GPR ')' { emit4($1 | ($2<<21) | ($6<<16) | $4); } + | OP_RS_RA_D GPR ',' e16 '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } + | OP_RS_RA_DS GPR ',' ds '(' GPR ')' { emit_hl($1 | ($2<<21) | ($6<<16) | $4); } | OP_RS_RA_NB GPR ',' GPR ',' nb { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB GPR ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } | OP_RS_RA_RB_C c GPR ',' GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($7<<11)); } | OP_RS_RA_RA_C c GPR ',' GPR { emit4($1 | $2 | ($5<<21) | ($3<<16) | ($5<<11)); } | OP_RS_SPR spr_num ',' GPR { emit4($1 | ($4<<21) | ($2<<11)); } | OP_TO_RA_RB u5 ',' GPR ',' GPR { emit4($1 | ($2<<21) | ($4<<16) | ($6<<11)); } - | OP_TO_RA_SI u5 ',' GPR ',' e16 { emit4($1 | ($2<<21) | ($4<<16) | $6); } + | OP_TO_RA_SI u5 ',' GPR ',' e16 { emit_hl($1 | ($2<<21) | ($4<<16) | $6); } | OP_TOX_RA_RB GPR ',' GPR { emit4($1 | ($2<<16) | ($4<<11)); } - | OP_TOX_RA_SI GPR ',' e16 { emit4($1 | ($2<<16) | $4); } + | OP_TOX_RA_SI GPR ',' e16 { emit_hl($1 | ($2<<16) | $4); } | OP_LEV { emit4($1); } | OP_LEV u7 { emit4($1 | ($2<<5)); } | OP_LIA lia { emit4($1 | $2); } @@ -217,10 +217,11 @@ e16 if (($1 < -0x8000) || ($1 > 0xffff)) serror("16-bit value out of range"); $$ = (uint16_t) $1; + no_hl(); } - | OP_HI ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, 0); } - | OP_HA ASC_LPAR expr ASC_RPAR { $$ = emit_hi(&$3, 1); } - | OP_LO ASC_LPAR expr ASC_RPAR { $$ = emit_lo(&$3); } + | OP_HI ASC_LPAR expr ASC_RPAR { $$ = eval_hl(&$3, OP_HI); } + | OP_HA ASC_LPAR expr ASC_RPAR { $$ = eval_hl(&$3, OP_HA); } + | OP_LO ASC_LPAR expr ASC_RPAR { $$ = eval_hl(&$3, OP_LO); } ; negate16 diff --git a/mach/powerpc/as/mach5.c b/mach/powerpc/as/mach5.c index 47460790c..d72b8514a 100644 --- a/mach/powerpc/as/mach5.c +++ b/mach/powerpc/as/mach5.c @@ -1,38 +1,75 @@ +static int hl_token; +static expr_t hl_expr; -word_t emit_hi(struct expr_t* expr, int is_signed) -{ - /* If this is a symbol reference, discard the symbol and keep only the - * offset part. */ - word_t type = expr->typ & S_TYP; - word_t val = expr->val; - uint16_t hi = val >> 16; - uint16_t lo = val & 0xffff; - - if (type != S_ABS) - newrelo(expr->typ, RELOPPC | FIXUPFLAGS); - - /* If the low half of this relocation is going to be a memory operation, - * then it'll be treated as a signed value. That means that values greater - * than 0x7fff will cause the high word to have 1 subtracted from it; so - * we apply an adjustment here. - */ - - if (is_signed && (lo > 0x7fff)) - hi++; - - return hi; +void no_hl(void) { + hl_token = 0; } -word_t emit_lo(struct expr_t* expr) +word_t eval_hl(expr_t* expr, int token) { - word_t type = expr->typ & S_TYP; - word_t val = expr->val; + word_t val = expr->val; + uint16_t hi = val >> 16; + uint16_t lo = val & 0xffff; - /* If the assembler stored a symbol for relocation later, we need to - * abandon it (because the relocation was generated by emit_ha). */ + hl_token = token; + hl_expr = *expr; - if (type != S_ABS) - relonami = 0; - - return val & 0xffff; + switch (token) { + case OP_HI: /* hi16[expr] */ + return hi; + case OP_HA: /* ha16[expr]*/ + /* + * If the low half will be treated as a signed value, + * then values greater than 0x7fff will cause the high + * half to have 1 subtracted from it; so we apply an + * adjustment here. + */ + if (lo > 0x7fff) + hi++; + return hi; + case OP_LO: /* lo16[expr] */ + return lo; + } +} + +void emit_hl(word_t in) +{ + word_t reg; + int type; + + switch (hl_token) { + case OP_HI: /* hi16[expr] */ + case OP_HA: /* ha16[expr] */ + if (PASS_RELO && (hl_expr.typ & S_TYP) != S_ABS) { + /* + * RELOLIS only works with lis _, _ (same as + * addis _, r0, _). Check if instruction + * isn't addis or register RA isn't r0. + */ + if ((in & 0xfc1f0000) != (0x3c000000)) + serror("relocation only works with lis"); + + /* + * High bit: ha16 flag + * Next 5 bits: register RT + * Low 26 bits: signed offset + */ + fit(fitx(hl_expr.val, 26)); + newrelo(hl_expr.typ, RELOLIS | FIXUPFLAGS); + reg = (in >> 21) & 0x1f; + in = (hl_token == OP_HA) << 31; + in |= reg << 26; + in |= hl_expr.val & 0x03ffffff; + } + break; + case OP_LO: /* lo16[expr] */ + if (PASS_RELO && (hl_expr.typ & S_TYP) != S_ABS) { + DOTVAL += 2; + newrelo(hl_expr.typ, RELO2 | FIXUPFLAGS); + DOTVAL -= 2; + } + break; + } + + emit4(in); } diff --git a/util/amisc/ashow.c b/util/amisc/ashow.c index 498ae3980..ec85de30d 100644 --- a/util/amisc/ashow.c +++ b/util/amisc/ashow.c @@ -140,6 +140,9 @@ showrelo() case RELOPPC: printf("\tPowerPC 26-bit address\n"); break; + case RELOLIS: + printf("\tPowerPC lis instruction\n"); + break; case RELOVC4: printf("\tVideoCore IV address in 32-bit instruction\n"); break; diff --git a/util/led/relocate.c b/util/led/relocate.c index 036b7dbb8..1b8960938 100644 --- a/util/led/relocate.c +++ b/util/led/relocate.c @@ -165,8 +165,17 @@ static uint32_t get_powerpc_valu(char* addr, uint16_t type) return ((hi << 16) | lo); } - fatal("Don't know how to read from PowerPC fixup on instructions 0x%08x+0x%08x", - opcode1, opcode2); + fatal("Don't know how to read from PowerPC fixup on instructions 0x%08lx+0x%08lx", + (unsigned long)opcode1, (unsigned long)opcode2); +} + +/* RELOLIS stores a signed 26-bit offset in the low bits. */ +static uint32_t get_lis_valu(char *addr, uint16_t type) +{ + uint32_t valu = read4(addr, type) & 0x03ffffff; + if (valu & 0x02000000) + valu |= 0xfc000000; /* sign extension */ + return valu; } /* @@ -184,6 +193,8 @@ static uint32_t getvalu(char* addr, uint16_t type) return read4(addr, type); case RELOPPC: return get_powerpc_valu(addr, type); + case RELOLIS: + return get_lis_valu(addr, type); case RELOVC4: return get_vc4_valu(addr); default: @@ -327,8 +338,34 @@ static void put_powerpc_valu(char* addr, uint32_t value, uint16_t type) } else - fatal("Don't know how to write a PowerPC fixup to instructions 0x%08x+0x%08x", - opcode1, opcode2); + fatal("Don't know how to write a PowerPC fixup to instructions 0x%08lx+0x%08lx", + (unsigned long)opcode1, (unsigned long)opcode2); +} + +/* Writes a PowerPC lis instruction. */ +static void put_lis_valu(char* addr, uint32_t value, uint16_t type) +{ + uint32_t opcode, reg; + uint16_t hi, lo; + bool ha16; + + /* ha16 flag in high bit, register in next 5 bits */ + opcode = read4(addr, type); + ha16 = opcode >> 31; + reg = (opcode >> 26) & 0x1f; + + /* + * Apply the sign adjustment if the ha16 flag is set and the + * low half is a negative signed 16-bit integer. + */ + hi = value >> 16; + lo = value & 0xffff; + if (ha16 && lo > 0x7fff) + hi++; + + /* Assemble lis reg, hi == addis reg, r0, hi. */ + opcode = (15 << 26) | (reg << 21) | (0 << 16) | hi; + write4(opcode, addr, type); } /* @@ -352,6 +389,9 @@ static putvalu(uint32_t valu, char* addr, uint16_t type) case RELOPPC: put_powerpc_valu(addr, valu, type); break; + case RELOLIS: + put_lis_valu(addr, valu, type); + break; case RELOVC4: put_vc4_valu(addr, valu); break; From 7255ed403f0876321587dffff96a23a222c9e784 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 8 Feb 2017 12:12:28 -0500 Subject: [PATCH 06/12] Tweak some tokens in PowerPC ncg. Remove the GPRINDIRECT token, and use the IND_RC_* tokens as operands to instructions. We no longer need to unpack an IND_RC_* token and repack it as a GPRINDIRECT to use it in an instruction. Allow storing IND_ALL_B and IND_ALL_H in register variables. Create a set ANY_BHW for anything that we can store in a regvar. Push register variables on the stack without using GPRE, by changing stwu to accept LOCAL. Then ncg will replace the string ">>> BUG IN LOCAL" with the register name. (I copied ">>> BUG IN LOCAL" from mach/arm/ncg/table.) Fix the rule for "pat lil inreg($1)>0" to yield a IND_RC_W token, not a register. We might need to kill the token with "kills MEMORY". Rename CONST_ALL to CONST_STACK, because it only includes constants on the stack, and excludes CONST tokens. Instructions still don't allow CONST_STACK operands, so we still need to repack each CONST_STACK as a CONST to use it in an instruction. Rename LABEL_OFFSET_HI to just LABEL_HI, and same for LABEL_HA and LABEL_HO. --- mach/powerpc/ncg/table | 237 ++++++++++++++++++++--------------------- 1 file changed, 117 insertions(+), 120 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index e76cadb33..fb5e6b3ff 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -157,17 +157,16 @@ TOKENS /* Used only in instruction descriptions (to generate the correct syntax). */ - GPRINDIRECT = { GPR reg; INT off; } 4 off "(" reg ")". GPRINDIRECT_OFFSET_LO = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". CONST = { INT val; } 4 val. /* Primitives */ LABEL = { ADDR adr; } 4 adr. - LABEL_OFFSET_HI = { ADDR adr; } 4 "hi16[" adr "]". - LABEL_OFFSET_HA = { ADDR adr; } 4 "ha16[" adr "]". - LABEL_OFFSET_LO = { ADDR adr; } 4 "lo16[" adr "]". - LOCAL = { INT off; } 4. + LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". + LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". + LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]". + LOCAL = { INT off; } 4 ">>> BUG IN LOCAL". /* Allows us to use regvar() to refer to registers */ @@ -192,15 +191,15 @@ TOKENS SEX_B = { GPR reg; } 4. SEX_H = { GPR reg; } 4. - IND_RC_B = { GPR reg; INT off; } 4. + IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". IND_RR_B = { GPR reg1; GPR reg2; } 4. - IND_RC_H = { GPR reg; INT off; } 4. + IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")". IND_RR_H = { GPR reg1; GPR reg2; } 4. - IND_RC_H_S = { GPR reg; INT off; } 4. + IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")". IND_RR_H_S = { GPR reg1; GPR reg2; } 4. - IND_RC_W = { GPR reg; INT off; } 4. + IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")". IND_RR_W = { GPR reg1; GPR reg2; } 4. - IND_RC_D = { GPR reg; INT off; } 8. + IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. NOT_R = { GPR reg; } 4. @@ -237,7 +236,7 @@ SETS /* unsigned 16-bit integer */ UCONST2 = CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF. /* any constant on stack */ - CONST_ALL = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + + CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. SUM_ALL = SUM_RC + SUM_RR. @@ -247,18 +246,19 @@ SETS LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. - /* indirect 4-byte value */ + /* indirect values */ + IND_ALL_B = IND_RC_B + IND_RR_B. + IND_ALL_H = IND_RC_H + IND_RR_H + IND_RC_H_S + IND_RR_H_S. IND_ALL_W = IND_RC_W + IND_RR_W. - /* indirect 8-byte value */ IND_ALL_D = IND_RC_D + IND_RR_D. - /* any indirect value that fits in a GPR */ - IND_ALL_BHW = IND_RC_B + IND_RR_B + IND_RC_H + IND_RR_H + - IND_RC_H_S + IND_RR_H_S + IND_ALL_W. + IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W. /* anything killed by sti (store indirect) */ MEMORY = IND_ALL_BHW + IND_ALL_D. - OP_ALL_W = SUM_ALL + SEX_ALL + LOGICAL_ALL + IND_ALL_W. + /* any stack token that we can easily move to GPR */ + ANY_BHW = REG + CONST_STACK + LABEL + SEX_ALL + + SUM_ALL + IND_ALL_BHW + LOGICAL_ALL. INSTRUCTIONS @@ -278,7 +278,9 @@ INSTRUCTIONS add GPR:wo, GPR:ro, GPR:ro. addX "add." GPR:wo, GPR:ro, GPR:ro. addi GPR:wo, GPR:ro, CONST:ro. - addis GPR:wo, GPR:ro, CONST+LABEL_OFFSET_HI+LABEL_OFFSET_HA:ro. + li GPR:wo, CONST:ro. + addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro. + lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro. and GPR:wo, GPR:ro, GPR:ro. andc GPR:wo, GPR:ro, GPR:ro. andiX "andi." GPR:wo:cc, GPR:ro, CONST:ro. @@ -326,22 +328,22 @@ INSTRUCTIONS frsp FSREG:wo, FREG:ro cost(4, 5). fsub FREG:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). - lbz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lbz GPR:wo, IND_RC_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lfd FPR:wo, GPRINDIRECT:ro cost(4, 5). - lfdu FPR:wo, GPRINDIRECT:ro cost(4, 5). + lfd FPR:wo, IND_RC_D:ro cost(4, 5). + lfdu FPR:wo, IND_RC_D:ro cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). - lfs FSREG:wo, GPRINDIRECT:ro cost(4, 4). - lfsu FSREG:wo, GPRINDIRECT:rw cost(4, 4). + lfs FSREG:wo, IND_RC_W:ro cost(4, 4). + lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). - lha GPR:wo, GPRINDIRECT:ro cost(4, 3). + lha GPR:wo, IND_RC_H_S:ro cost(4, 3). lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lhz GPR:wo, GPRINDIRECT:ro cost(4, 3). + lhz GPR:wo, IND_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). li32 GPR:wo, CONST:ro cost(8, 2). - lwzu GPR:wo, GPRINDIRECT:ro cost(4, 3). + lwzu GPR:wo, IND_RC_W:ro cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR:wo, GPRINDIRECT+GPRINDIRECT_OFFSET_LO:ro cost(4, 3). + lwz GPR:wo, IND_RC_W+GPRINDIRECT_OFFSET_LO:ro cost(4, 3). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. @@ -351,7 +353,7 @@ INSTRUCTIONS mtspr SPR:wo, GPR:ro cost(4, 2). or GPR:wo, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. - ori GPR:wo, GPR:ro, CONST+LABEL_OFFSET_LO:ro. + ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. @@ -362,19 +364,19 @@ INSTRUCTIONS sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR:wo, GPR:ro, GPR:ro. - stb GPR:ro, GPRINDIRECT:rw cost(4, 3). + stb GPR:ro, IND_RC_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stfd FPR:ro, GPRINDIRECT:rw cost(4, 4). - stfdu FPR:ro, GPRINDIRECT:rw cost(4, 4). + stfd FPR:ro, IND_RC_D:rw cost(4, 4). + stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). - stfs FSREG:ro, GPRINDIRECT:rw cost(4, 3). - stfsu FSREG:ro, GPRINDIRECT:rw cost(4, 3). + stfs FSREG:ro, IND_RC_W:rw cost(4, 3). + stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). - sth GPR:ro, GPRINDIRECT:rw cost(4, 3). + sth GPR:ro, IND_RC_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stw GPR:ro, GPRINDIRECT:rw cost(4, 3). + stw GPR:ro, IND_RC_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stwu GPR+GPRE:ro, GPRINDIRECT:rw cost(4, 3). + stwu GPR+LOCAL:ro, IND_RC_W:rw cost(4, 3). xor GPR:wo, GPR:ro, GPR:ro. xori GPR:wo, GPR:ro, CONST:ro. xoris GPR:wo, GPR:ro, CONST:ro. @@ -390,25 +392,22 @@ MOVES COMMENT("move GPR->GPR") or %2, %1, %1 -/* GPRE exists solely to allow us to use regvar() (which can only be used in - an expression) as a register constant. */ - - from GPR to GPRE - gen - COMMENT("move GPR->GPRE") - or %2.reg, %1, %1 - /* Constants */ - from CONST_ALL + CONST smalls(%val) to GPR + from CONST + CONST_STACK smalls(%val) to GPR gen - COMMENT("move CONST_ALL->GPR smalls") - addi %2, R0, {CONST, %1.val} + COMMENT("move CONST->GPR smalls") + li %2, {CONST, %1.val} - from CONST_ALL + CONST to GPR + from CONST + CONST_STACK lo(%val)==0 to GPR gen - COMMENT("move CONST_ALL->GPR") - addis %2, R0, {CONST, hi(%1.val)} + COMMENT("move CONST->GPR shifted") + lis %2, {CONST, hi(%1.val)} + + from CONST + CONST_STACK to GPR + gen + COMMENT("move CONST->GPR") + lis %2, {CONST, hi(%1.val)} ori %2, %2, {CONST, lo(%1.val)} /* Can't use addi %2, %2, {CONST, los(%1.val)} * because %2 might be R0. */ @@ -416,8 +415,8 @@ MOVES from LABEL to GPR gen COMMENT("move LABEL->GPR") - addis %2, R0, {LABEL_OFFSET_HI, %1.adr} - ori %2, %2, {LABEL_OFFSET_LO, %1.adr} + lis %2, {LABEL_HI, %1.adr} + ori %2, %2, {LABEL_LO, %1.adr} /* Sign extension */ @@ -453,7 +452,7 @@ MOVES from IND_RC_B to GPR gen COMMENT("move IND_RC_B->GPR") - lbz %2, {GPRINDIRECT, %1.reg, %1.off} + lbz %2, %1 from IND_RR_B to GPR gen @@ -465,7 +464,7 @@ MOVES from GPR to IND_RC_B gen COMMENT("move GPR->IND_RC_B") - stb %1, {GPRINDIRECT, %2.reg, %2.off} + stb %1, %2 from GPR to IND_RR_B gen @@ -477,7 +476,7 @@ MOVES from IND_RC_H to GPR gen COMMENT("move IND_RC_H->GPR") - lhz %2, {GPRINDIRECT, %1.reg, %1.off} + lhz %2, %1 from IND_RR_H to GPR gen @@ -487,7 +486,7 @@ MOVES from IND_RC_H_S to GPR gen COMMENT("move IND_RC_H_S->GPR") - lha %2, {GPRINDIRECT, %1.reg, %1.off} + lha %2, %1 from IND_RR_H_S to GPR gen @@ -499,7 +498,7 @@ MOVES from GPR to IND_RC_H gen COMMENT("move GPR->IND_RC_H") - sth %1, {GPRINDIRECT, %2.reg, %2.off} + sth %1, %2 from GPR to IND_RR_H gen @@ -511,7 +510,7 @@ MOVES from IND_RC_W to GPR gen COMMENT("move IND_RC_W->GPR") - lwz %2, {GPRINDIRECT, %1.reg, %1.off} + lwz %2, %1 from IND_RR_W to GPR gen @@ -521,7 +520,7 @@ MOVES from IND_RC_W to FSREG gen COMMENT("move IND_RC_W->FSREG") - lfs %2, {GPRINDIRECT, %1.reg, %1.off} + lfs %2, %1 from IND_RR_W to FSREG gen @@ -533,7 +532,7 @@ MOVES from GPR to IND_RC_W gen COMMENT("move GPR->IND_RC_W") - stw %1, {GPRINDIRECT, %2.reg, %2.off} + stw %1, %2 from GPR to IND_RR_W gen @@ -543,7 +542,7 @@ MOVES from FSREG to IND_RC_W gen COMMENT("move FSREG->IND_RC_W") - stfs %1, {GPRINDIRECT, %2.reg, %2.off} + stfs %1, %2 from FSREG to IND_RR_W gen @@ -555,7 +554,7 @@ MOVES from IND_RC_D to FPR gen COMMENT("move IND_RC_D->FPR") - lfd %2, {GPRINDIRECT, %1.reg, %1.off} + lfd %2, {IND_RC_D, %1.reg, %1.off} from IND_RR_D to FPR gen @@ -567,7 +566,7 @@ MOVES from FPR to IND_RC_D gen COMMENT("move FPR->IND_RC_D") - stfd %1, {GPRINDIRECT, %2.reg, %2.off} + stfd %1, {IND_RC_D, %2.reg, %2.off} from FPR to IND_RR_D gen @@ -681,9 +680,10 @@ MOVES extrwi %2, %1.reg, {CONST, 1}, {CONST, 1} xori %2, %2, {CONST, 1} -/* Miscellaneous */ +/* GPRE exists solely to allow us to use regvar() (which can only be used in + an expression) as a register constant. */ - from OP_ALL_W + LABEL + CONST_ALL to GPRE + from ANY_BHW to GPRE gen move %1, %2.reg @@ -701,64 +701,64 @@ STACKINGRULES from LOCAL to STACK gen COMMENT("stack LOCAL") - stwu {GPRE, regvar(%1.off)}, {GPRINDIRECT, SP, 0-4} + stwu %1, {IND_RC_W, SP, 0-4} from REG to STACK gen COMMENT("stack REG") - stwu %1, {GPRINDIRECT, SP, 0-4} + stwu %1, {IND_RC_W, SP, 0-4} from REG_PAIR to STACK gen COMMENT("stack REG_PAIR") - stwu %1.2, {GPRINDIRECT, SP, 0-4} - stwu %1.1, {GPRINDIRECT, SP, 0-4} + stwu %1.2, {IND_RC_W, SP, 0-4} + stwu %1.1, {IND_RC_W, SP, 0-4} - from CONST_ALL + LABEL to STACK + from CONST_STACK + LABEL to STACK gen - COMMENT("stack CONST_ALL + LABEL") + COMMENT("stack CONST_STACK + LABEL") move %1, RSCRATCH - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + stwu RSCRATCH, {IND_RC_W, SP, 0-4} from SEX_B to STACK gen COMMENT("stack SEX_B") extsb RSCRATCH, %1.reg - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + stwu RSCRATCH, {IND_RC_W, SP, 0-4} from SEX_H to STACK gen COMMENT("stack SEX_H") extsh RSCRATCH, %1.reg - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + stwu RSCRATCH, {IND_RC_W, SP, 0-4} from SUM_ALL + LOGICAL_ALL to STACK gen COMMENT("stack SUM_ALL + LOGICAL_ALL") move %1, RSCRATCH - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + stwu RSCRATCH, {IND_RC_W, SP, 0-4} from IND_ALL_BHW to STACK gen COMMENT("stack IND_ALL_BHW") move %1, RSCRATCH - stwu RSCRATCH, {GPRINDIRECT, SP, 0-4} + stwu RSCRATCH, {IND_RC_W, SP, 0-4} from IND_ALL_D to STACK gen COMMENT("stack IND_ALL_D") move %1, FSCRATCH - stfdu FSCRATCH, {GPRINDIRECT, SP, 0-8} + stfdu FSCRATCH, {IND_RC_D, SP, 0-8} from FREG to STACK gen COMMENT("stack FPR") - stfdu %1, {GPRINDIRECT, SP, 0-8} + stfdu %1, {IND_RC_D, SP, 0-8} from FSREG to STACK gen COMMENT("stack FSREG") - stfsu %1, {GPRINDIRECT, SP, 0-4} + stfsu %1, {IND_RC_W, SP, 0-4} @@ -771,10 +771,10 @@ COERCIONS move %1, %a yields %a - from CONST_ALL + from CONST + CONST_STACK uses REG gen - COMMENT("coerce CONST_ALL->REG") + COMMENT("coerce CONST->REG") move %1, %a yields %a @@ -789,7 +789,7 @@ COERCIONS uses REG gen COMMENT("coerce STACK->REG") - lwz %a, {GPRINDIRECT, SP, 0} + lwz %a, {IND_RC_W, SP, 0} addi SP, SP, {CONST, 4} yields %a @@ -797,8 +797,8 @@ COERCIONS uses REG_PAIR gen COMMENT("coerce STACK->REG_PAIR") - lwz %a.1, {GPRINDIRECT, SP, 0} - lwz %a.2, {GPRINDIRECT, SP, 4} + lwz %a.1, {IND_RC_W, SP, 0} + lwz %a.2, {IND_RC_W, SP, 4} addi SP, SP, {CONST, 8} yields %a @@ -838,7 +838,7 @@ COERCIONS uses FREG gen COMMENT("coerce STACK->FREG") - lfd %a, {GPRINDIRECT, SP, 0} + lfd %a, {IND_RC_D, SP, 0} addi SP, SP, {CONST, 8} yields %a @@ -846,7 +846,7 @@ COERCIONS uses FSREG gen COMMENT("coerce STACK->FSREG") - lfs %a, {GPRINDIRECT, SP, 0} + lfs %a, {IND_RC_W, SP, 0} addi SP, SP, {CONST, 4} yields %a @@ -1003,7 +1003,7 @@ PATTERNS loi INT32*2 pat stl inreg($1)>0 /* Store to local */ - with CONST_ALL + LABEL + GPR + OP_ALL_W + with ANY_BHW kills regvar($1), LOCAL %off==$1 gen move %1, {GPRE, regvar($1)} @@ -1019,10 +1019,7 @@ PATTERNS sti INT32*2 pat lil inreg($1)>0 /* Load from indirected local */ - uses REG - gen - lwz %a, {GPRINDIRECT, regvar($1), 0} - yields %a + yields {IND_RC_W, regvar($1), 0} pat lil /* Load from indirected local */ leaving @@ -1092,17 +1089,17 @@ PATTERNS kills MEMORY uses REG={LABEL, $1}, REG gen - lwz %b, {GPRINDIRECT, %a, 0} + lwz %b, {IND_RC_W, %a, 0} addi %b, %b, {CONST, 1} - stw %b, {GPRINDIRECT, %a, 0} + stw %b, {IND_RC_W, %a, 0} pat dee /* Decrement external */ kills MEMORY uses REG={LABEL, $1}, REG gen - lwz %b, {GPRINDIRECT, %a, 0} + lwz %b, {IND_RC_W, %a, 0} addi %b, %b, {CONST, 0-1} - stw %b, {GPRINDIRECT, %a, 0} + stw %b, {IND_RC_W, %a, 0} @@ -1161,7 +1158,7 @@ PATTERNS with LABEL uses REG gen - addis %a, R0, {LABEL_OFFSET_HA, %1.adr} + lis %a, {LABEL_HA, %1.adr} lwz %a, {GPRINDIRECT_OFFSET_LO, %a, %1.adr} yields %a with GPR @@ -1194,7 +1191,7 @@ PATTERNS with GPR GPR kills MEMORY gen - stb %2, {GPRINDIRECT, %1, 0} + stb %2, {IND_RC_B, %1, 0} with SUM_RR GPR kills MEMORY gen @@ -1206,7 +1203,7 @@ PATTERNS with GPR SEX_B kills MEMORY gen - stb %2.reg, {GPRINDIRECT, %1, 0} + stb %2.reg, {IND_RC_B, %1, 0} with SUM_RR SEX_B kills MEMORY gen @@ -1220,7 +1217,7 @@ PATTERNS with GPR GPR kills MEMORY gen - sth %2, {GPRINDIRECT, %1, 0} + sth %2, {IND_RC_H, %1, 0} with SUM_RR GPR kills MEMORY gen @@ -1232,7 +1229,7 @@ PATTERNS with GPR SEX_H kills MEMORY gen - sth %2.reg, {GPRINDIRECT, %1, 0} + sth %2.reg, {IND_RC_H, %1, 0} with SUM_RR SEX_H kills MEMORY gen @@ -1381,13 +1378,13 @@ PATTERNS with REG CONST_HZ uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields %a - with CONST_ALL-CONST2-CONST_HZ REG + with CONST_STACK-CONST2-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(%1.val)} yields {SUM_RC, %a, los(%1.val)} - with REG CONST_ALL-CONST2-CONST_HZ + with REG CONST_STACK-CONST2-CONST_HZ uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} - with CONST_ALL LABEL + with CONST_STACK LABEL yields {LABEL, %2.adr+%1.val} pat sbi $1==4 /* Subtract word (second - top) */ @@ -1401,10 +1398,10 @@ PATTERNS with CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields %a - with CONST_ALL-CONST2_WHEN_NEG-CONST_HZ REG + with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} - with CONST_ALL LABEL + with CONST_STACK LABEL yields {LABEL, %2.adr+(0-%1.val)} pat ngi $1==4 /* Negate word */ @@ -1519,10 +1516,10 @@ PATTERNS with CONST_HZ REG uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} yields %a - with REG CONST_ALL-UCONST2-CONST_HZ + with REG CONST_STACK-UCONST2-CONST_HZ uses reusing %1, REG={OR_RIS, %1, hi(%2.val)} yields {OR_RC, %1, lo(%2.val)} - with CONST_ALL-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HZ REG uses reusing %2, REG={OR_RIS, %2, hi(%1.val)} yields {OR_RC, %2, lo(%1.val)} @@ -1549,10 +1546,10 @@ PATTERNS with CONST_HZ REG uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} yields %a - with REG CONST_ALL-UCONST2-CONST_HZ + with REG CONST_STACK-UCONST2-CONST_HZ uses reusing %1, REG={XOR_RIS, %1, hi(%2.val)} yields {XOR_RC, %1, lo(%2.val)} - with CONST_ALL-UCONST2-CONST_HZ REG + with CONST_STACK-UCONST2-CONST_HZ REG uses reusing %2, REG={XOR_RIS, %2, hi(%1.val)} yields {XOR_RC, %2, lo(%1.val)} @@ -1601,7 +1598,7 @@ PATTERNS cal ".zer" pat sli $1==4 /* Shift left (second << top) */ - with CONST_ALL GPR + with CONST_STACK GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} @@ -1613,7 +1610,7 @@ PATTERNS yields %a pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST_ALL GPR + with CONST_STACK GPR uses reusing %2, REG gen srawi %a, %2, {CONST, %1.val & 0x1F} @@ -1625,7 +1622,7 @@ PATTERNS yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST_ALL GPR + with CONST_STACK GPR uses reusing %2, REG gen rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} @@ -2121,18 +2118,18 @@ PATTERNS uses REG gen move {CONST, $1}, %a - stwu %a, {GPRINDIRECT, SP, 0-4} - stwu %2, {GPRINDIRECT, SP, 0-4} - stwu %1, {GPRINDIRECT, SP, 0-4} + stwu %a, {IND_RC_W, SP, 0-4} + stwu %2, {IND_RC_W, SP, 0-4} + stwu %1, {IND_RC_W, SP, 0-4} bl {LABEL, "_memmove"} addi SP, SP, {CONST, 12} pat bls /* Block move variable length */ with GPR GPR GPR STACK gen - stwu %1, {GPRINDIRECT, SP, 0-4} - stwu %3, {GPRINDIRECT, SP, 0-4} - stwu %2, {GPRINDIRECT, SP, 0-4} + stwu %1, {IND_RC_W, SP, 0-4} + stwu %3, {IND_RC_W, SP, 0-4} + stwu %2, {IND_RC_W, SP, 0-4} bl {LABEL, "_memmove"} addi SP, SP, {CONST, 12} @@ -2198,7 +2195,7 @@ PATTERNS with GPR uses reusing %1, REG gen - lwz %a, {GPRINDIRECT, %1, FP_OFFSET} + lwz %a, {IND_RC_W, %1, FP_OFFSET} yields %a pat lpb /* Convert FP to argument address */ @@ -2256,7 +2253,7 @@ PATTERNS with CONST_HZ STACK gen move {SUM_RC, SP, his(%1.val)}, SP - with CONST_ALL-CONST2-CONST_HZ STACK + with CONST_STACK-CONST2-CONST_HZ STACK gen move {SUM_RC, SP, his(%1.val)}, SP move {SUM_RC, SP, los(%1.val)}, SP From 754e96ef16a9a145ad72926950a40479bf31c4e5 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Thu, 2 Feb 2017 10:48:25 -0500 Subject: [PATCH 07/12] Use ha16/lo16 to emit pairs of lis/stw, lis/lfs, lis/stfs. A 4-byte load from a label yields a token IND_RL_W. This token emits either lis/lwz or lis/lfs, if we want a general-purpose register or a floating-point register. --- mach/powerpc/ncg/table | 114 +++++++++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index fb5e6b3ff..2794292a3 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -155,17 +155,14 @@ REGISTERS TOKENS -/* Used only in instruction descriptions (to generate the correct syntax). */ - - GPRINDIRECT_OFFSET_LO = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". - CONST = { INT val; } 4 val. - /* Primitives */ + CONST = { INT val; } 4 val. LABEL = { ADDR adr; } 4 adr. LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]". + LABEL_STACK = { GPR reg; ADDR adr; } 4. LOCAL = { INT off; } 4 ">>> BUG IN LOCAL". /* Allows us to use regvar() to refer to registers */ @@ -191,16 +188,17 @@ TOKENS SEX_B = { GPR reg; } 4. SEX_H = { GPR reg; } 4. - IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". - IND_RR_B = { GPR reg1; GPR reg2; } 4. - IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")". - IND_RR_H = { GPR reg1; GPR reg2; } 4. - IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")". - IND_RR_H_S = { GPR reg1; GPR reg2; } 4. - IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")". - IND_RR_W = { GPR reg1; GPR reg2; } 4. - IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")". - IND_RR_D = { GPR reg1; GPR reg2; } 8. + IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RR_B = { GPR reg1; GPR reg2; } 4. + IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RR_H = { GPR reg1; GPR reg2; } 4. + IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RR_H_S = { GPR reg1; GPR reg2; } 4. + IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". + IND_RR_W = { GPR reg1; GPR reg2; } 4. + IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")". + IND_RR_D = { GPR reg1; GPR reg2; } 8. NOT_R = { GPR reg; } 4. @@ -249,7 +247,7 @@ SETS /* indirect values */ IND_ALL_B = IND_RC_B + IND_RR_B. IND_ALL_H = IND_RC_H + IND_RR_H + IND_RC_H_S + IND_RR_H_S. - IND_ALL_W = IND_RC_W + IND_RR_W. + IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. IND_ALL_D = IND_RC_D + IND_RR_D. IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W. @@ -277,7 +275,7 @@ INSTRUCTIONS add GPR:wo, GPR:ro, GPR:ro. addX "add." GPR:wo, GPR:ro, GPR:ro. - addi GPR:wo, GPR:ro, CONST:ro. + addi GPR:wo, GPR:ro, CONST+LABEL_LO:ro. li GPR:wo, CONST:ro. addis GPR:wo, GPR:ro, CONST+LABEL_HI+LABEL_HA:ro. lis GPR:wo, CONST+LABEL_HI+LABEL_HA:ro. @@ -333,7 +331,7 @@ INSTRUCTIONS lfd FPR:wo, IND_RC_D:ro cost(4, 5). lfdu FPR:wo, IND_RC_D:ro cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). - lfs FSREG:wo, IND_RC_W:ro cost(4, 4). + lfs FSREG:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). lha GPR:wo, IND_RC_H_S:ro cost(4, 3). @@ -343,7 +341,7 @@ INSTRUCTIONS li32 GPR:wo, CONST:ro cost(8, 2). lwzu GPR:wo, IND_RC_W:ro cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lwz GPR:wo, IND_RC_W+GPRINDIRECT_OFFSET_LO:ro cost(4, 3). + lwz GPR:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). nand GPR:wo, GPR:ro, GPR:ro. neg GPR:wo, GPR:ro. nor GPR:wo, GPR:ro, GPR:ro. @@ -369,12 +367,12 @@ INSTRUCTIONS stfd FPR:ro, IND_RC_D:rw cost(4, 4). stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). - stfs FSREG:ro, IND_RC_W:rw cost(4, 3). + stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). sth GPR:ro, IND_RC_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stw GPR:ro, IND_RC_W:rw cost(4, 3). + stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stwu GPR+LOCAL:ro, IND_RC_W:rw cost(4, 3). xor GPR:wo, GPR:ro, GPR:ro. @@ -418,6 +416,16 @@ MOVES lis %2, {LABEL_HI, %1.adr} ori %2, %2, {LABEL_LO, %1.adr} + from LABEL_HA to GPR + gen + lis %2, %1 + + from LABEL_STACK to GPR + gen + move {LABEL_HA, %1.adr}, %1.reg + addi %2, %1.reg, {LABEL_LO, %1.adr} + + /* Sign extension */ from SEX_B to GPR @@ -512,6 +520,11 @@ MOVES COMMENT("move IND_RC_W->GPR") lwz %2, %1 + from IND_RL_W to GPR + gen + move {LABEL_HA, %1.adr}, %1.reg + lwz %2, %1 + from IND_RR_W to GPR gen COMMENT("move IND_RR_W->GPR") @@ -522,6 +535,11 @@ MOVES COMMENT("move IND_RC_W->FSREG") lfs %2, %1 + from IND_RL_W to FSREG + gen + move {LABEL_HA, %1.adr}, %1.reg + lfs %2, %1 + from IND_RR_W to FSREG gen COMMENT("move IND_RR_W->FSREG") @@ -714,9 +732,15 @@ STACKINGRULES stwu %1.2, {IND_RC_W, SP, 0-4} stwu %1.1, {IND_RC_W, SP, 0-4} - from CONST_STACK + LABEL to STACK + from CONST_STACK to STACK gen - COMMENT("stack CONST_STACK + LABEL") + COMMENT("stack CONST_STACK") + move %1, RSCRATCH + stwu RSCRATCH, {IND_RC_W, SP, 0-4} + + from LABEL_STACK to STACK + gen + COMMENT("stack LABEL_STACK") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, SP, 0-4} @@ -1058,7 +1082,8 @@ PATTERNS lae $1 pat lae /* Load address of external */ - yields {LABEL, $1} + uses REG + yields {LABEL_STACK, %a, $1} pat loe /* Load word external */ leaving @@ -1155,17 +1180,13 @@ PATTERNS yields {IND_RC_H, %1.reg, %1.off} pat loi $1==INT32 /* Load word indirect */ - with LABEL - uses REG - gen - lis %a, {LABEL_HA, %1.adr} - lwz %a, {GPRINDIRECT_OFFSET_LO, %a, %1.adr} - yields %a with GPR yields {IND_RC_W, %1, 0} - with SUM_RC + with exact LABEL_STACK + yields {IND_RL_W, %1.reg, %1.adr} + with exact SUM_RC yields {IND_RC_W, %1.reg, %1.off} - with SUM_RR + with exact SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} pat loi $1==INT64 /* Load double-word indirect */ @@ -1240,15 +1261,25 @@ PATTERNS move %2.reg, {IND_RC_H, %1.reg, %1.off} pat sti $1==INT32 /* Store word indirect */ - with GPR GPR+FSREG + with REG REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1, 0} - with SUM_RR GPR+FSREG + with LABEL_STACK REG + kills MEMORY + gen + move {LABEL_HA, %1.adr}, %1.reg + stw %2, {IND_RL_W, %1.reg, %1.adr} + with LABEL_STACK FSREG + kills MEMORY + gen + move {LABEL_HA, %1.adr}, %1.reg + stfs %2, {IND_RL_W, %1.reg, %1.adr} + with SUM_RR REG+FSREG kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} - with SUM_RC GPR+FSREG + with SUM_RC REG+FSREG kills MEMORY gen move %2, {IND_RC_W, %1.reg, %1.off} @@ -1384,8 +1415,12 @@ PATTERNS with REG CONST_STACK-CONST2-CONST_HZ uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} - with CONST_STACK LABEL - yields {LABEL, %2.adr+%1.val} + with exact CONST_STACK LABEL_STACK + uses reusing %2.reg, REG + yields {LABEL_STACK, %a, %2.adr+%1.val} + with exact LABEL_STACK CONST_STACK + uses reusing %1.reg, REG + yields {LABEL_STACK, %a, %1.adr+%2.val} pat sbi $1==4 /* Subtract word (second - top) */ with REG REG @@ -1401,8 +1436,9 @@ PATTERNS with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} - with CONST_STACK LABEL - yields {LABEL, %2.adr+(0-%1.val)} + with exact CONST_STACK LABEL_STACK + uses reusing %2.reg, REG + yields {LABEL_STACK, %a, %2.adr+(0-%1.val)} pat ngi $1==4 /* Negate word */ with REG From ed21a59a8217de116ee4b908cff2cafba825d337 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 8 Feb 2017 12:23:06 -0500 Subject: [PATCH 08/12] In PowerPC ncg, allocate register for ha16[label]. Use it to generate code like lis r12,ha16[__II0] lis r11,ha16[_f] lfs f1,lo16[_f](r11) lfs f2,lo16[__II0](r12) fadds f13,f2,f1 stfs f13,lo16[_f](r11) Here ncg has allocated r11 for ha16[_f]. We use r11 in lfs and again in stfs. Before this change, we needed an extra lis before stfs, because ncg did not remember that ha16[_f] was in a register. This example has a gap between ha16[__II0] and lo16[__II0], because the lo16 is not in the next instruction. This requires my previous commit 1bf58cf for RELOLIS. There is a gap because ncg emits the lis as soon as I allocate it. The "lfs f2,lo16[__II0](r12)" happens in a coercion from IND_RL_W to FSREG. The coercion allocates one FSREG but may not allocate any other registers. So I must allocate r12 earlier. I allocate r12 in pat lae, but this causes a gap. --- mach/powerpc/ncg/table | 88 ++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 54 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 2794292a3..6d2d785bb 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -162,7 +162,6 @@ TOKENS LABEL_HI = { ADDR adr; } 4 "hi16[" adr "]". LABEL_HA = { ADDR adr; } 4 "ha16[" adr "]". LABEL_LO = { ADDR adr; } 4 "lo16[" adr "]". - LABEL_STACK = { GPR reg; ADDR adr; } 4. LOCAL = { INT off; } 4 ">>> BUG IN LOCAL". /* Allows us to use regvar() to refer to registers */ @@ -181,9 +180,10 @@ TOKENS /* Expression partial results */ - SUM_RIS = { GPR reg; INT offhi; } 4. - SUM_RC = { GPR reg; INT off; } 4. - SUM_RR = { GPR reg1; GPR reg2; } 4. + SUM_RIS = { GPR reg; INT offhi; } 4. /* reg + (offhi << 16) */ + SUM_RC = { GPR reg; INT off; } 4. /* reg + off */ + SUM_RL = { GPR reg; ADDR adr; } 4. /* reg + lo16[adr] */ + SUM_RR = { GPR reg1; GPR reg2; } 4. /* reg1 + reg2 */ SEX_B = { GPR reg; } 4. SEX_H = { GPR reg; } 4. @@ -237,7 +237,7 @@ SETS CONST_STACK = CONST_N8000 + CONST_N7FFF_N0001 + CONST_0000_7FFF + CONST_8000 + CONST_8001_FFFF + CONST_HZ + CONST_HL. - SUM_ALL = SUM_RC + SUM_RR. + SUM_ALL = SUM_RC + SUM_RL + SUM_RR. SEX_ALL = SEX_B + SEX_H. @@ -420,12 +420,6 @@ MOVES gen lis %2, %1 - from LABEL_STACK to GPR - gen - move {LABEL_HA, %1.adr}, %1.reg - addi %2, %1.reg, {LABEL_LO, %1.adr} - - /* Sign extension */ from SEX_B to GPR @@ -450,6 +444,11 @@ MOVES COMMENT("move SUM_RC->GPR") addi %2, %1.reg, {CONST, %1.off} + from SUM_RL to GPR + gen + COMMENT("move SUM_RL->GPR") + addi %2, %1.reg, {LABEL_LO, %1.adr} + from SUM_RR to GPR gen COMMENT("move SUM_RR->GPR") @@ -522,7 +521,6 @@ MOVES from IND_RL_W to GPR gen - move {LABEL_HA, %1.adr}, %1.reg lwz %2, %1 from IND_RR_W to GPR @@ -537,7 +535,6 @@ MOVES from IND_RL_W to FSREG gen - move {LABEL_HA, %1.adr}, %1.reg lfs %2, %1 from IND_RR_W to FSREG @@ -552,6 +549,10 @@ MOVES COMMENT("move GPR->IND_RC_W") stw %1, %2 + from GPR to IND_RL_W + gen + stw %1, %2 + from GPR to IND_RR_W gen COMMENT("move GPR->IND_RR_W") @@ -562,6 +563,10 @@ MOVES COMMENT("move FSREG->IND_RC_W") stfs %1, %2 + from FSREG to IND_RL_W + gen + stfs %1, %2 + from FSREG to IND_RR_W gen COMMENT("move FSREG->IND_RR_W") @@ -738,12 +743,6 @@ STACKINGRULES move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, SP, 0-4} - from LABEL_STACK to STACK - gen - COMMENT("stack LABEL_STACK") - move %1, RSCRATCH - stwu RSCRATCH, {IND_RC_W, SP, 0-4} - from SEX_B to STACK gen COMMENT("stack SEX_B") @@ -1082,8 +1081,8 @@ PATTERNS lae $1 pat lae /* Load address of external */ - uses REG - yields {LABEL_STACK, %a, $1} + uses REG={LABEL_HA, $1} + yields {SUM_RL, %a, $1} pat loe /* Load word external */ leaving @@ -1111,20 +1110,16 @@ PATTERNS ste $1 pat ine /* Increment external */ - kills MEMORY - uses REG={LABEL, $1}, REG - gen - lwz %b, {IND_RC_W, %a, 0} - addi %b, %b, {CONST, 1} - stw %b, {IND_RC_W, %a, 0} + leaving + loe $1 + inc + ste $1 pat dee /* Decrement external */ - kills MEMORY - uses REG={LABEL, $1}, REG - gen - lwz %b, {IND_RC_W, %a, 0} - addi %b, %b, {CONST, 0-1} - stw %b, {IND_RC_W, %a, 0} + leaving + loe $1 + dec + ste $1 @@ -1182,10 +1177,10 @@ PATTERNS pat loi $1==INT32 /* Load word indirect */ with GPR yields {IND_RC_W, %1, 0} - with exact LABEL_STACK - yields {IND_RL_W, %1.reg, %1.adr} with exact SUM_RC yields {IND_RC_W, %1.reg, %1.off} + with exact SUM_RL + yields {IND_RL_W, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_W, %1.reg1, %1.reg2} @@ -1265,20 +1260,14 @@ PATTERNS kills MEMORY gen move %2, {IND_RC_W, %1, 0} - with LABEL_STACK REG - kills MEMORY - gen - move {LABEL_HA, %1.adr}, %1.reg - stw %2, {IND_RL_W, %1.reg, %1.adr} - with LABEL_STACK FSREG - kills MEMORY - gen - move {LABEL_HA, %1.adr}, %1.reg - stfs %2, {IND_RL_W, %1.reg, %1.adr} with SUM_RR REG+FSREG kills MEMORY gen move %2, {IND_RR_W, %1.reg1, %1.reg2} + with SUM_RL REG+FSREG + kills MEMORY + gen + move %2, {IND_RL_W, %1.reg, %1.adr} with SUM_RC REG+FSREG kills MEMORY gen @@ -1415,12 +1404,6 @@ PATTERNS with REG CONST_STACK-CONST2-CONST_HZ uses reusing %1, REG={SUM_RIS, %1, his(%2.val)} yields {SUM_RC, %a, los(%2.val)} - with exact CONST_STACK LABEL_STACK - uses reusing %2.reg, REG - yields {LABEL_STACK, %a, %2.adr+%1.val} - with exact LABEL_STACK CONST_STACK - uses reusing %1.reg, REG - yields {LABEL_STACK, %a, %1.adr+%2.val} pat sbi $1==4 /* Subtract word (second - top) */ with REG REG @@ -1436,9 +1419,6 @@ PATTERNS with CONST_STACK-CONST2_WHEN_NEG-CONST_HZ REG uses reusing %2, REG={SUM_RIS, %2, his(0-%1.val)} yields {SUM_RC, %a, los(0-%1.val)} - with exact CONST_STACK LABEL_STACK - uses reusing %2.reg, REG - yields {LABEL_STACK, %a, %2.adr+(0-%1.val)} pat ngi $1==4 /* Negate word */ with REG From 5e00e1fce2583e08447324bdd0c3c60c51302445 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 8 Feb 2017 12:27:16 -0500 Subject: [PATCH 09/12] Trimming mach/powerpc/ncg/table Remove coercion from LABEL to REG. The coercion never happens because I have stopped putting LABEL on the stack. Also remove LABEL from set ANY_BHW. Retain the move from LABEL to REG because pat gto uses it. Remove li32 instruction, unused after the switch to the hi16, ha16, lo16 syntax. Remove COMMENT(...) lines from most moves. In my opinion, they took too much space, both in the table and in the assembly output. The stacking rules and coercions keep their COMMENT(...) lines. In test GPR, don't write to RSCRATCH. Fold several coercions into a single coercion from ANY_BHW uses REG. Use REG instead of GPR in stack patterns. REG and GPR act the same, because every GPR on the stack is a REG, but I want to be clear that I expect a REG, not r0. In code rules, sort SUM_RC before SORT_RR, so I can add SUM_RL later. Remove rules to optimize loc loc cii loc loc cii. If $2==$4, the peephole optimizer can optimize it. If $2!=$4, then the EM program is missing a conversion from size $2 to size $4. Remove rules to store a SEX_B with sti 1 or a SEX_H with sti 2. These rules would never get used, unless the EM program is missing a conversion from size 4 to size 1 or 2. --- mach/powerpc/ncg/table | 418 ++++++++++++----------------------------- 1 file changed, 117 insertions(+), 301 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 6d2d785bb..c608b2fd6 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -28,7 +28,6 @@ PC_OFFSET = 4 /* Offset of saved PC relative to our FP */ #define his(n) ((hi(n) + (lo(n)>>15)) & 0xFFFF) - PROPERTIES GPR /* any GPR */ @@ -50,6 +49,7 @@ PROPERTIES FPR16(8) FPR17(8) FPR18(8) FPR19(8) FPR20(8) FPR21(8) FPR22(8) FPR23(8) FPR24(8) FPR25(8) FPR26(8) FPR27(8) FPR28(8) FPR29(8) FPR30(8) FPR31(8) + REGISTERS /* Reverse order to encourage ncg to allocate them from r31 down */ @@ -255,7 +255,7 @@ SETS MEMORY = IND_ALL_BHW + IND_ALL_D. /* any stack token that we can easily move to GPR */ - ANY_BHW = REG + CONST_STACK + LABEL + SEX_ALL + + ANY_BHW = REG + CONST_STACK + SEX_ALL + SUM_ALL + IND_ALL_BHW + LOGICAL_ALL. @@ -338,7 +338,6 @@ INSTRUCTIONS lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). lhz GPR:wo, IND_RC_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - li32 GPR:wo, CONST:ro cost(8, 2). lwzu GPR:wo, IND_RC_W:ro cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwz GPR:wo, IND_RC_W+IND_RL_W:ro cost(4, 3). @@ -354,6 +353,7 @@ INSTRUCTIONS ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. orX "or." GPR:wo:cc, GPR:ro, GPR:ro. + orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. @@ -382,13 +382,10 @@ INSTRUCTIONS comment "!" LABEL:ro cost(0, 0). - MOVES from GPR to GPR - gen - COMMENT("move GPR->GPR") - or %2, %1, %1 + gen or %2, %1, %1 /* Constants */ @@ -417,226 +414,149 @@ MOVES ori %2, %2, {LABEL_LO, %1.adr} from LABEL_HA to GPR - gen - lis %2, %1 + gen lis %2, %1 /* Sign extension */ from SEX_B to GPR - gen - COMMENT("move SEX_B->GPR") - extsb %2, %1.reg + gen extsb %2, %1.reg from SEX_H to GPR - gen - COMMENT("move SEX_H->GPR") - extsh %2, %1.reg + gen extsh %2, %1.reg /* Register + something */ from SUM_RIS to GPR - gen - COMMENT("move SUM_RIS->GPR") - addis %2, %1.reg, {CONST, %1.offhi} + gen addis %2, %1.reg, {CONST, %1.offhi} from SUM_RC to GPR - gen - COMMENT("move SUM_RC->GPR") - addi %2, %1.reg, {CONST, %1.off} + gen addi %2, %1.reg, {CONST, %1.off} from SUM_RL to GPR - gen - COMMENT("move SUM_RL->GPR") - addi %2, %1.reg, {LABEL_LO, %1.adr} + gen addi %2, %1.reg, {LABEL_LO, %1.adr} from SUM_RR to GPR - gen - COMMENT("move SUM_RR->GPR") - add %2, %1.reg1, %1.reg2 + gen add %2, %1.reg1, %1.reg2 /* Read byte */ from IND_RC_B to GPR - gen - COMMENT("move IND_RC_B->GPR") - lbz %2, %1 + gen lbz %2, %1 from IND_RR_B to GPR - gen - COMMENT("move IND_RR_B->GPR") - lbzx %2, %1.reg1, %1.reg2 + gen lbzx %2, %1.reg1, %1.reg2 /* Write byte */ from GPR to IND_RC_B - gen - COMMENT("move GPR->IND_RC_B") - stb %1, %2 + gen stb %1, %2 from GPR to IND_RR_B - gen - COMMENT("move GPR->IND_RR_B") - stbx %1, %2.reg1, %2.reg2 + gen stbx %1, %2.reg1, %2.reg2 /* Read halfword (short) */ from IND_RC_H to GPR - gen - COMMENT("move IND_RC_H->GPR") - lhz %2, %1 + gen lhz %2, %1 from IND_RR_H to GPR - gen - COMMENT("move IND_RR_H->GPR") - lhzx %2, %1.reg1, %1.reg2 + gen lhzx %2, %1.reg1, %1.reg2 from IND_RC_H_S to GPR - gen - COMMENT("move IND_RC_H_S->GPR") - lha %2, %1 + gen lha %2, %1 from IND_RR_H_S to GPR - gen - COMMENT("move IND_RR_H_S->GPR") - lhax %2, %1.reg1, %1.reg2 + gen lhax %2, %1.reg1, %1.reg2 /* Write halfword */ from GPR to IND_RC_H - gen - COMMENT("move GPR->IND_RC_H") - sth %1, %2 + gen sth %1, %2 from GPR to IND_RR_H - gen - COMMENT("move GPR->IND_RR_H") - sthx %1, %2.reg1, %2.reg2 + gen sthx %1, %2.reg1, %2.reg2 /* Read word */ from IND_RC_W to GPR - gen - COMMENT("move IND_RC_W->GPR") - lwz %2, %1 + gen lwz %2, %1 from IND_RL_W to GPR - gen - lwz %2, %1 + gen lwz %2, %1 from IND_RR_W to GPR - gen - COMMENT("move IND_RR_W->GPR") - lwzx %2, %1.reg1, %1.reg2 + gen lwzx %2, %1.reg1, %1.reg2 from IND_RC_W to FSREG - gen - COMMENT("move IND_RC_W->FSREG") - lfs %2, %1 + gen lfs %2, %1 from IND_RL_W to FSREG - gen - lfs %2, %1 + gen lfs %2, %1 from IND_RR_W to FSREG - gen - COMMENT("move IND_RR_W->FSREG") - lfsx %2, %1.reg1, %1.reg2 + gen lfsx %2, %1.reg1, %1.reg2 /* Write word */ from GPR to IND_RC_W - gen - COMMENT("move GPR->IND_RC_W") - stw %1, %2 + gen stw %1, %2 from GPR to IND_RL_W - gen - stw %1, %2 + gen stw %1, %2 from GPR to IND_RR_W - gen - COMMENT("move GPR->IND_RR_W") - stwx %1, %2.reg1, %2.reg2 + gen stwx %1, %2.reg1, %2.reg2 from FSREG to IND_RC_W - gen - COMMENT("move FSREG->IND_RC_W") - stfs %1, %2 + gen stfs %1, %2 from FSREG to IND_RL_W - gen - stfs %1, %2 + gen stfs %1, %2 from FSREG to IND_RR_W - gen - COMMENT("move FSREG->IND_RR_W") - stfsx %1, %2.reg1, %2.reg2 + gen stfsx %1, %2.reg1, %2.reg2 /* Read double */ from IND_RC_D to FPR - gen - COMMENT("move IND_RC_D->FPR") - lfd %2, {IND_RC_D, %1.reg, %1.off} + gen lfd %2, {IND_RC_D, %1.reg, %1.off} from IND_RR_D to FPR - gen - COMMENT("move IND_RR_D->FPR") - lfdx %2, %1.reg1, %1.reg2 + gen lfdx %2, %1.reg1, %1.reg2 /* Write double */ from FPR to IND_RC_D - gen - COMMENT("move FPR->IND_RC_D") - stfd %1, {IND_RC_D, %2.reg, %2.off} + gen stfd %1, {IND_RC_D, %2.reg, %2.off} from FPR to IND_RR_D - gen - COMMENT("move FPR->IND_RR_W") - stfdx %1, %2.reg1, %2.reg2 + gen stfdx %1, %2.reg1, %2.reg2 /* Logicals */ from NOT_R to GPR - gen - COMMENT("move NOT_R->GPR") - nor %2, %1.reg, %1.reg + gen nor %2, %1.reg, %1.reg from AND_RR to GPR - gen - COMMENT("move AND_RR->GPR") - and %2, %1.reg1, %1.reg2 + gen and %2, %1.reg1, %1.reg2 from OR_RR to GPR - gen - COMMENT("move OR_RR->GPR") - or %2, %1.reg1, %1.reg2 + gen or %2, %1.reg1, %1.reg2 from OR_RIS to GPR - gen - COMMENT("move OR_RIS->GPR") - oris %2, %1.reg, {CONST, %1.valhi} + gen oris %2, %1.reg, {CONST, %1.valhi} from OR_RC to GPR - gen - COMMENT("move OR_RC->GPR") - ori %2, %1.reg, {CONST, %1.val} + gen ori %2, %1.reg, {CONST, %1.val} from XOR_RR to GPR - gen - COMMENT("move XOR_RR->GPR") - xor %2, %1.reg1, %1.reg2 + gen xor %2, %1.reg1, %1.reg2 from XOR_RIS to GPR - gen - COMMENT("move XOR_RIS->GPR") - xoris %2, %1.reg, {CONST, %1.valhi} + gen xoris %2, %1.reg, {CONST, %1.valhi} from XOR_RC to GPR - gen - COMMENT("move XOR_RC->GPR") - xori %2, %1.reg, {CONST, %1.val} + gen xori %2, %1.reg, {CONST, %1.val} /* Conditions */ @@ -707,16 +627,17 @@ MOVES an expression) as a register constant. */ from ANY_BHW to GPRE - gen - move %1, %2.reg + gen move %1, %2.reg TESTS + /* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1, + * not allowed here". We use orX_readonly to trick ncgg. + */ to test GPR gen - orX RSCRATCH, %1, %1 - + orX_readonly %1, %1, %1 STACKINGRULES @@ -787,24 +708,10 @@ STACKINGRULES COERCIONS - from REG + from ANY_BHW uses REG gen - COMMENT("coerce REG->REG") - move %1, %a - yields %a - - from CONST + CONST_STACK - uses REG - gen - COMMENT("coerce CONST->REG") - move %1, %a - yields %a - - from LABEL - uses REG - gen - COMMENT("coerce LABEL->REG") + COMMENT("coerce ANY_BHW->REG") move %1, %a yields %a @@ -825,26 +732,6 @@ COERCIONS addi SP, SP, {CONST, 8} yields %a - from SEX_B - uses REG - gen - COMMENT("coerce SEX_B->REG") - extsb %a, %1.reg - yields %a - - from SEX_H - uses REG - gen - COMMENT("coerce SEX_H->REG") - extsh %a, %1.reg - yields %a - - from SUM_ALL + LOGICAL_ALL - uses REG - gen - move %1, %a - yields %a - from FSREG uses FSREG gen @@ -873,12 +760,6 @@ COERCIONS addi SP, SP, {CONST, 4} yields %a - from IND_ALL_BHW - uses REG - gen - move %1, %a - yields %a - from IND_ALL_W uses FSREG gen @@ -958,18 +839,6 @@ PATTERNS /* Type conversions */ - pat loc loc cii loc loc cii $1==$4 && $2==$5 /* madness, generated by the C compiler */ - leaving - loc $1 - loc $2 - cii - - pat loc loc cii loc loc cii $2==INT32 && $5==INT32 && $4<$2 /* madness, generated by the C compiler */ - leaving - loc $4 - loc $5 - cii - pat loc loc ciu /* signed X -> unsigned X */ leaving loc $1 @@ -992,17 +861,14 @@ PATTERNS /* nop */ pat loc loc cii $1==INT8 && $2==INT32 /* signed char -> signed int */ - with GPR + with REG yields {SEX_B, %1} pat loc loc cii $1==2 && $2==4 /* signed char -> signed short */ - with GPR + with REG yields {SEX_H, %1} - - - /* Local variables */ pat lal smalls($1) /* Load address of local */ @@ -1122,7 +988,6 @@ PATTERNS ste $1 - /* Structures */ pat lof /* Load word offsetted */ @@ -1146,36 +1011,35 @@ PATTERNS sti INT64 - /* Loads and stores */ pat loi $1==INT8 /* Load byte indirect */ - with GPR + with REG yields {IND_RC_B, %1, 0} - with SUM_RR - yields {IND_RR_B, %1.reg1, %1.reg2} - with SUM_RC + with exact SUM_RC yields {IND_RC_B, %1.reg, %1.off} + with exact SUM_RR + yields {IND_RR_B, %1.reg1, %1.reg2} pat loi loc loc cii $1==INT16 && $2==INT16 && $3==INT32 /* Load half-word indirect and sign extend */ - with GPR + with REG yields {IND_RC_H_S, %1, 0} - with SUM_RR - yields {IND_RR_H_S, %1.reg1, %1.reg2} - with SUM_RC + with exact SUM_RC yields {IND_RC_H_S, %1.reg, %1.off} + with exact SUM_RR + yields {IND_RR_H_S, %1.reg1, %1.reg2} pat loi $1==INT16 /* Load half-word indirect */ - with GPR + with REG yields {IND_RC_H, %1, 0} - with SUM_RR - yields {IND_RR_H, %1.reg1, %1.reg2} - with SUM_RC + with exact SUM_RC yields {IND_RC_H, %1.reg, %1.off} + with exact SUM_RR + yields {IND_RR_H, %1.reg1, %1.reg2} pat loi $1==INT32 /* Load word indirect */ - with GPR + with REG yields {IND_RC_W, %1, 0} with exact SUM_RC yields {IND_RC_W, %1.reg, %1.off} @@ -1185,11 +1049,11 @@ PATTERNS yields {IND_RR_W, %1.reg1, %1.reg2} pat loi $1==INT64 /* Load double-word indirect */ - with GPR + with REG yields {IND_RC_D, %1, 0} - with SUM_RC + with exact SUM_RC yields {IND_RC_D, %1.reg, %1.off} - with SUM_RR + with exact SUM_RR yields {IND_RR_D, %1.reg1, %1.reg2} pat loi /* Load arbitrary size */ @@ -1204,88 +1068,51 @@ PATTERNS bl {LABEL, ".los"} pat sti $1==INT8 /* Store byte indirect */ - with GPR GPR + with REG REG kills MEMORY - gen - stb %2, {IND_RC_B, %1, 0} - with SUM_RR GPR + gen move %2, {IND_RC_B, %1, 0} + with SUM_RC REG kills MEMORY - gen - stbx %2, %1.reg1, %1.reg2 - with SUM_RC GPR + gen move %2, {IND_RC_B, %1.reg, %1.off} + with SUM_RR REG kills MEMORY - gen - move %2, {IND_RC_B, %1.reg, %1.off} - with GPR SEX_B - kills MEMORY - gen - stb %2.reg, {IND_RC_B, %1, 0} - with SUM_RR SEX_B - kills MEMORY - gen - stbx %2.reg, %1.reg1, %1.reg2 - with SUM_RC SEX_B - kills MEMORY - gen - move %2.reg, {IND_RC_B, %1.reg, %1.off} + gen move %2, {IND_RR_B, %1.reg1, %1.reg2} pat sti $1==INT16 /* Store half-word indirect */ - with GPR GPR + with REG REG kills MEMORY - gen - sth %2, {IND_RC_H, %1, 0} - with SUM_RR GPR + gen move %2, {IND_RC_H, %1, 0} + with SUM_RC REG kills MEMORY - gen - sthx %2, %1.reg1, %1.reg2 - with SUM_RC GPR + gen move %2, {IND_RC_H, %1.reg, %1.off} + with SUM_RR REG kills MEMORY - gen - move %2, {IND_RC_H, %1.reg, %1.off} - with GPR SEX_H - kills MEMORY - gen - sth %2.reg, {IND_RC_H, %1, 0} - with SUM_RR SEX_H - kills MEMORY - gen - sthx %2.reg, %1.reg1, %1.reg2 - with SUM_RC SEX_H - kills MEMORY - gen - move %2.reg, {IND_RC_H, %1.reg, %1.off} + gen move %2, {IND_RR_H, %1.reg1, %1.reg2} pat sti $1==INT32 /* Store word indirect */ with REG REG+FSREG kills MEMORY - gen - move %2, {IND_RC_W, %1, 0} - with SUM_RR REG+FSREG - kills MEMORY - gen - move %2, {IND_RR_W, %1.reg1, %1.reg2} - with SUM_RL REG+FSREG - kills MEMORY - gen - move %2, {IND_RL_W, %1.reg, %1.adr} + gen move %2, {IND_RC_W, %1, 0} with SUM_RC REG+FSREG kills MEMORY - gen - move %2, {IND_RC_W, %1.reg, %1.off} + gen move %2, {IND_RC_W, %1.reg, %1.off} + with SUM_RL REG+FSREG + kills MEMORY + gen move %2, {IND_RL_W, %1.reg, %1.adr} + with SUM_RR REG+FSREG + kills MEMORY + gen move %2, {IND_RR_W, %1.reg1, %1.reg2} pat sti $1==INT64 /* Store double-word indirect */ with REG FREG kills MEMORY - gen - move %2, {IND_RC_D, %1, 0} - with SUM_RR FREG - kills MEMORY - gen - move %2, {IND_RR_D, %1.reg1, %1.reg2} + gen move %2, {IND_RC_D, %1, 0} with SUM_RC FREG kills MEMORY - gen - move %2, {IND_RC_D, %1.reg, %1.off} + gen move %2, {IND_RC_D, %1.reg, %1.off} + with SUM_RR FREG + kills MEMORY + gen move %2, {IND_RR_D, %1.reg1, %1.reg2} /* * This pattern would be too slow: * with REG REG REG @@ -1331,7 +1158,6 @@ PATTERNS bl {LABEL, ".sts"} - /* Arithmetic wrappers */ pat ads $1==4 /* Add var to pointer */ @@ -1382,7 +1208,6 @@ PATTERNS sli $1 - /* Word arithmetic */ pat adi $1==4 /* Add word (second + top) */ @@ -1467,34 +1292,34 @@ PATTERNS yields %a pat and $1==4 /* AND word */ - with GPR NOT_R + with REG NOT_R uses reusing %1, REG gen andc %a, %1, %2.reg yields %a - with NOT_R GPR + with NOT_R REG uses reusing %1, REG gen andc %a, %2, %1.reg yields %a - with GPR GPR + with REG REG yields {AND_RR, %1, %2} - with GPR UCONST2 + with REG UCONST2 uses reusing %1, REG gen andiX %a, %1, {CONST, %2.val} yields %a - with UCONST2 GPR + with UCONST2 REG uses reusing %2, REG gen andiX %a, %2, {CONST, %1.val} yields %a - with GPR CONST_HZ + with REG CONST_HZ uses reusing %1, REG gen andisX %a, %1, {CONST, hi(%2.val)} yields %a - with CONST_HZ GPR + with CONST_HZ REG uses reusing %2, REG gen andisX %a, %2, {CONST, hi(%1.val)} @@ -1592,7 +1417,7 @@ PATTERNS gen eqv %a, %1.reg1, %1.reg2 yields %a - with GPR + with REG yields {NOT_R, %1} pat com defined($1) /* NOT set */ @@ -1614,36 +1439,36 @@ PATTERNS cal ".zer" pat sli $1==4 /* Shift left (second << top) */ - with CONST_STACK GPR + with CONST_STACK REG uses reusing %2, REG gen rlwinm %a, %2, {CONST, (%1.val & 0x1F)}, {CONST, 0}, {CONST, 31-(%1.val & 0x1F)} yields %a - with GPR GPR + with REG REG uses reusing %2, REG gen slw %a, %2, %1 yields %a pat sri $1==4 /* Shift right signed (second >> top) */ - with CONST_STACK GPR + with CONST_STACK REG uses reusing %2, REG gen srawi %a, %2, {CONST, %1.val & 0x1F} yields %a - with GPR GPR + with REG REG uses reusing %2, REG gen sraw %a, %2, %1 yields %a pat sru $1==4 /* Shift right unsigned (second >> top) */ - with CONST_STACK GPR + with CONST_STACK REG uses reusing %2, REG gen rlwinm %a, %2, {CONST, 32-(%1.val & 0x1F)}, {CONST, (%1.val & 0x1F)}, {CONST, 31} yields %a - with GPR GPR + with REG REG uses reusing %2, REG gen srw %a, %2, %1 @@ -1685,7 +1510,6 @@ PATTERNS bl {LABEL, ".sar4"} - /* Sets */ pat set defined($1) /* Create singleton set */ @@ -1708,7 +1532,6 @@ PATTERNS cal ".inn" - /* Boolean resolutions */ pat teq /* top = (top == 0) */ @@ -1934,7 +1757,6 @@ PATTERNS yields %a - /* Simple branches */ proc zxx example zeq @@ -2001,7 +1823,6 @@ PATTERNS pat cmu zle $1==4 call cmu4zxx("ble", "bge") - /* Comparisons */ /* Each comparison extracts the lt and gt bits from cr0. @@ -2057,7 +1878,6 @@ PATTERNS yields R3 - /* Other branching and labelling */ pat lab topeltsize($1)==4 && !fallthrough($1) @@ -2090,7 +1910,6 @@ PATTERNS b {LABEL, $1} - /* Miscellaneous */ pat cal /* Call procedure */ @@ -2100,7 +1919,7 @@ PATTERNS bl {LABEL, $1} pat cai /* Call procedure indirect */ - with GPR STACK + with REG STACK kills ALL gen mtspr CTR, %1 @@ -2130,7 +1949,7 @@ PATTERNS b {LABEL, ".ret"} pat blm /* Block move constant length */ - with GPR GPR STACK + with REG REG STACK uses REG gen move {CONST, $1}, %a @@ -2141,7 +1960,7 @@ PATTERNS addi SP, SP, {CONST, 12} pat bls /* Block move variable length */ - with GPR GPR GPR STACK + with REG REG REG STACK gen stwu %1, {IND_RC_W, SP, 0-4} stwu %3, {IND_RC_W, SP, 0-4} @@ -2160,7 +1979,6 @@ PATTERNS b {LABEL, ".csb"} - /* EM specials */ pat fil /* Set current filename */ @@ -2208,7 +2026,7 @@ PATTERNS dch pat dch /* FP -> caller FP */ - with GPR + with REG uses reusing %1, REG gen lwz %a, {IND_RC_W, %1, FP_OFFSET} @@ -2246,17 +2064,17 @@ PATTERNS yields %a pat str $1==0 /* Store FP */ - with GPR + with REG gen move %1, FP pat str $1==1 /* Store SP */ - with GPR + with REG gen move %1, SP pat loc ass $1==4 && $2==4 /* Drop 4 bytes from stack */ - with exact GPR + with exact REG /* nop */ with STACK gen @@ -2273,7 +2091,7 @@ PATTERNS gen move {SUM_RC, SP, his(%1.val)}, SP move {SUM_RC, SP, los(%1.val)}, SP - with GPR STACK + with REG STACK gen move {SUM_RR, SP, %1}, SP @@ -2292,8 +2110,6 @@ PATTERNS yields %1 - - /* Floating point support */ /* All very cheap and nasty --- this needs to be properly integrated into From 85391399a407abc734216a88952d90195a7fd3d4 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Wed, 8 Feb 2017 12:31:14 -0500 Subject: [PATCH 10/12] Use ha16/lo16 to load or store 1, 2, 8 bytes from labels. Add the tokens IND_RL_B, IND_RL_H, IND_RL_H_S, IND_RL_D, along with the rules to use them. These rules emit shorter code. For example, loading a byte becomes lis, lbz instead of lis, addi, lbz. While making this, I wrongly set IND_RL_D to size 4. Then ncg made infinite recursion in codegen() and stackupto(), until it crashed by stack overflow. I correctly set IND_RL_D to size 8, preventing the crash. --- mach/powerpc/ncg/table | 81 +++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index c608b2fd6..5a5d7ce52 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -189,15 +189,19 @@ TOKENS SEX_H = { GPR reg; } 4. IND_RC_B = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RL_B = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_B = { GPR reg1; GPR reg2; } 4. IND_RC_H = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RL_H = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_H = { GPR reg1; GPR reg2; } 4. IND_RC_H_S = { GPR reg; INT off; } 4 off "(" reg ")". + IND_RL_H_S = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_H_S = { GPR reg1; GPR reg2; } 4. IND_RC_W = { GPR reg; INT off; } 4 off "(" reg ")". IND_RL_W = { GPR reg; ADDR adr; } 4 "lo16[" adr "](" reg ")". IND_RR_W = { GPR reg1; GPR reg2; } 4. IND_RC_D = { GPR reg; INT off; } 8 off "(" reg ")". + IND_RL_D = { GPR reg; ADDR adr; } 8 "lo16[" adr "](" reg ")". IND_RR_D = { GPR reg1; GPR reg2; } 8. NOT_R = { GPR reg; } 4. @@ -244,11 +248,11 @@ SETS LOGICAL_ALL = NOT_R + AND_RR + OR_RR + OR_RC + XOR_RR + XOR_RC. - /* indirect values */ - IND_ALL_B = IND_RC_B + IND_RR_B. - IND_ALL_H = IND_RC_H + IND_RR_H + IND_RC_H_S + IND_RR_H_S. + IND_ALL_B = IND_RC_B + IND_RL_B + IND_RR_B. + IND_ALL_H = IND_RC_H + IND_RL_H + IND_RR_H + + IND_RC_H_S + IND_RL_H_S + IND_RR_H_S. IND_ALL_W = IND_RC_W + IND_RL_W + IND_RR_W. - IND_ALL_D = IND_RC_D + IND_RR_D. + IND_ALL_D = IND_RC_D + IND_RL_D + IND_RR_D. IND_ALL_BHW = IND_ALL_B + IND_ALL_H + IND_ALL_W. /* anything killed by sti (store indirect) */ @@ -326,17 +330,17 @@ INSTRUCTIONS frsp FSREG:wo, FREG:ro cost(4, 5). fsub FREG:wo, FREG:ro, FREG:ro cost(4, 5). fsubs FSREG:wo, FSREG:ro, FSREG:ro cost(4, 5). - lbz GPR:wo, IND_RC_B:ro cost(4, 3). + lbz GPR:wo, IND_RC_B+IND_RL_B:ro cost(4, 3). lbzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lfd FPR:wo, IND_RC_D:ro cost(4, 5). + lfd FPR:wo, IND_RC_D+IND_RL_D:ro cost(4, 5). lfdu FPR:wo, IND_RC_D:ro cost(4, 5). lfdx FPR:wo, GPR:ro, GPR:ro cost(4, 5). lfs FSREG:wo, IND_RC_W+IND_RL_W:ro cost(4, 4). lfsu FSREG:wo, IND_RC_W:rw cost(4, 4). lfsx FSREG:wo, GPR:ro, GPR:ro cost(4, 4). - lha GPR:wo, IND_RC_H_S:ro cost(4, 3). + lha GPR:wo, IND_RC_H_S+IND_RL_H_S:ro cost(4, 3). lhax GPR:wo, GPR:ro, GPR:ro cost(4, 3). - lhz GPR:wo, IND_RC_H:ro cost(4, 3). + lhz GPR:wo, IND_RC_H+IND_RL_H:ro cost(4, 3). lhzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). lwzu GPR:wo, IND_RC_W:ro cost(4, 3). lwzx GPR:wo, GPR:ro, GPR:ro cost(4, 3). @@ -362,15 +366,15 @@ INSTRUCTIONS sraw GPR:wo, GPR:ro, GPR:ro cost(4, 2). srawi GPR:wo, GPR:ro, CONST:ro cost(4, 2). srw GPR:wo, GPR:ro, GPR:ro. - stb GPR:ro, IND_RC_B:rw cost(4, 3). + stb GPR:ro, IND_RC_B+IND_RL_B:rw cost(4, 3). stbx GPR:ro, GPR:ro, GPR:ro cost(4, 3). - stfd FPR:ro, IND_RC_D:rw cost(4, 4). + stfd FPR:ro, IND_RC_D+IND_RL_D:rw cost(4, 4). stfdu FPR:ro, IND_RC_D:rw cost(4, 4). stfdx FPR:ro, GPR:ro, GPR:ro cost(4, 4). stfs FSREG:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stfsu FSREG:ro, IND_RC_W:rw cost(4, 3). stfsx FSREG:ro, GPR:ro, GPR:ro cost(4, 3). - sth GPR:ro, IND_RC_H:rw cost(4, 3). + sth GPR:ro, IND_RC_H+IND_RL_H:rw cost(4, 3). sthx GPR:ro, GPR:ro, GPR:ro cost(4, 3). stw GPR:ro, IND_RC_W+IND_RL_W:rw cost(4, 3). stwx GPR:ro, GPR:ro, GPR:ro cost(4, 3). @@ -440,7 +444,7 @@ MOVES /* Read byte */ - from IND_RC_B to GPR + from IND_RC_B+IND_RL_B to GPR gen lbz %2, %1 from IND_RR_B to GPR @@ -448,7 +452,7 @@ MOVES /* Write byte */ - from GPR to IND_RC_B + from GPR to IND_RC_B+IND_RL_B gen stb %1, %2 from GPR to IND_RR_B @@ -456,13 +460,13 @@ MOVES /* Read halfword (short) */ - from IND_RC_H to GPR + from IND_RC_H+IND_RL_H to GPR gen lhz %2, %1 from IND_RR_H to GPR gen lhzx %2, %1.reg1, %1.reg2 - from IND_RC_H_S to GPR + from IND_RC_H_S+IND_RL_H_S to GPR gen lha %2, %1 from IND_RR_H_S to GPR @@ -470,7 +474,7 @@ MOVES /* Write halfword */ - from GPR to IND_RC_H + from GPR to IND_RC_H+IND_RL_H gen sth %1, %2 from GPR to IND_RR_H @@ -478,19 +482,13 @@ MOVES /* Read word */ - from IND_RC_W to GPR - gen lwz %2, %1 - - from IND_RL_W to GPR + from IND_RC_W+IND_RL_W to GPR gen lwz %2, %1 from IND_RR_W to GPR gen lwzx %2, %1.reg1, %1.reg2 - from IND_RC_W to FSREG - gen lfs %2, %1 - - from IND_RL_W to FSREG + from IND_RC_W+IND_RL_W to FSREG gen lfs %2, %1 from IND_RR_W to FSREG @@ -498,19 +496,13 @@ MOVES /* Write word */ - from GPR to IND_RC_W - gen stw %1, %2 - - from GPR to IND_RL_W + from GPR to IND_RC_W+IND_RL_W gen stw %1, %2 from GPR to IND_RR_W gen stwx %1, %2.reg1, %2.reg2 - from FSREG to IND_RC_W - gen stfs %1, %2 - - from FSREG to IND_RL_W + from FSREG to IND_RC_W+IND_RL_W gen stfs %1, %2 from FSREG to IND_RR_W @@ -518,16 +510,16 @@ MOVES /* Read double */ - from IND_RC_D to FPR - gen lfd %2, {IND_RC_D, %1.reg, %1.off} + from IND_RC_D+IND_RL_D to FPR + gen lfd %2, %1 from IND_RR_D to FPR gen lfdx %2, %1.reg1, %1.reg2 /* Write double */ - from FPR to IND_RC_D - gen stfd %1, {IND_RC_D, %2.reg, %2.off} + from FPR to IND_RC_D+IND_RL_D + gen stfd %1, %2 from FPR to IND_RR_D gen stfdx %1, %2.reg1, %2.reg2 @@ -1018,6 +1010,8 @@ PATTERNS yields {IND_RC_B, %1, 0} with exact SUM_RC yields {IND_RC_B, %1.reg, %1.off} + with exact SUM_RL + yields {IND_RL_B, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_B, %1.reg1, %1.reg2} @@ -1027,6 +1021,8 @@ PATTERNS yields {IND_RC_H_S, %1, 0} with exact SUM_RC yields {IND_RC_H_S, %1.reg, %1.off} + with exact SUM_RL + yields {IND_RL_H_S, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_H_S, %1.reg1, %1.reg2} @@ -1035,6 +1031,8 @@ PATTERNS yields {IND_RC_H, %1, 0} with exact SUM_RC yields {IND_RC_H, %1.reg, %1.off} + with exact SUM_RL + yields {IND_RL_H, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_H, %1.reg1, %1.reg2} @@ -1053,6 +1051,8 @@ PATTERNS yields {IND_RC_D, %1, 0} with exact SUM_RC yields {IND_RC_D, %1.reg, %1.off} + with exact SUM_RL + yields {IND_RL_D, %1.reg, %1.adr} with exact SUM_RR yields {IND_RR_D, %1.reg1, %1.reg2} @@ -1074,6 +1074,9 @@ PATTERNS with SUM_RC REG kills MEMORY gen move %2, {IND_RC_B, %1.reg, %1.off} + with SUM_RL REG + kills MEMORY + gen move %2, {IND_RL_B, %1.reg, %1.adr} with SUM_RR REG kills MEMORY gen move %2, {IND_RR_B, %1.reg1, %1.reg2} @@ -1085,6 +1088,9 @@ PATTERNS with SUM_RC REG kills MEMORY gen move %2, {IND_RC_H, %1.reg, %1.off} + with SUM_RL REG + kills MEMORY + gen move %2, {IND_RL_H, %1.reg, %1.adr} with SUM_RR REG kills MEMORY gen move %2, {IND_RR_H, %1.reg1, %1.reg2} @@ -1110,6 +1116,9 @@ PATTERNS with SUM_RC FREG kills MEMORY gen move %2, {IND_RC_D, %1.reg, %1.off} + with SUM_RL FREG + kills MEMORY + gen move %2, {IND_RL_D, %1.reg, %1.adr} with SUM_RR FREG kills MEMORY gen move %2, {IND_RR_D, %1.reg1, %1.reg2} From 83c13597e104c031a753365efce2b4f3bdb21480 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 10 Feb 2017 11:45:50 -0500 Subject: [PATCH 11/12] Use "mr" and make a few other tweaks in PowerPC ncg table. Use extended "mr" instead of basic "or" to move registers. Both "mr" and "or" encode the same machine instruction. With "mr", I can more easily search the assembly output for register moves. Fold several stacking rules into a single rule ANY_BHW-REG to STACK. Remove the EM patterns for loc mlu $2==2 and loc slu. The first pattern had the wrong size (should be $2==4, not $2==2). Both patterns were redundant. They rewrote loc mlu as loc mli and loc slu as loc sli, but this table doesn't have patterns for loc mli or loc sli, so it is enough to rewrite mlu as mli and slu as sli. --- mach/powerpc/ncg/table | 52 +++++++++--------------------------------- 1 file changed, 11 insertions(+), 41 deletions(-) diff --git a/mach/powerpc/ncg/table b/mach/powerpc/ncg/table index 5a5d7ce52..24822482c 100644 --- a/mach/powerpc/ncg/table +++ b/mach/powerpc/ncg/table @@ -353,11 +353,12 @@ INSTRUCTIONS mfspr GPR:wo, SPR:ro cost(4, 3). mtspr SPR:wo, GPR:ro cost(4, 2). or GPR:wo, GPR:ro, GPR:ro. + mr GPR:wo, GPR:ro. + orX "or." GPR:wo:cc, GPR:ro, GPR:ro. + orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. orc GPR:wo, GPR:ro, GPR:ro. ori GPR:wo, GPR:ro, CONST+LABEL_LO:ro. oris GPR:wo, GPR:ro, CONST:ro. - orX "or." GPR:wo:cc, GPR:ro, GPR:ro. - orX_readonly "or." GPR:ro:cc, GPR:ro, GPR:ro. rlwinm GPR:wo, GPR:ro, CONST:ro, CONST:ro, CONST:ro. extlwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. extrwi GPR:wo, GPR:ro, CONST:ro, CONST:ro. @@ -389,7 +390,7 @@ INSTRUCTIONS MOVES from GPR to GPR - gen or %2, %1, %1 + gen mr %2, %1 /* Constants */ @@ -626,6 +627,9 @@ TESTS /* Given orX %1, %1, %1, ncgg says, "Instruction destroys %1, * not allowed here". We use orX_readonly to trick ncgg. + * + * Using "or." and not "mr." because mach/powerpc/top/table + * was optimizing "or." and not "mr.". */ to test GPR gen @@ -650,33 +654,9 @@ STACKINGRULES stwu %1.2, {IND_RC_W, SP, 0-4} stwu %1.1, {IND_RC_W, SP, 0-4} - from CONST_STACK to STACK + from ANY_BHW-REG to STACK gen - COMMENT("stack CONST_STACK") - move %1, RSCRATCH - stwu RSCRATCH, {IND_RC_W, SP, 0-4} - - from SEX_B to STACK - gen - COMMENT("stack SEX_B") - extsb RSCRATCH, %1.reg - stwu RSCRATCH, {IND_RC_W, SP, 0-4} - - from SEX_H to STACK - gen - COMMENT("stack SEX_H") - extsh RSCRATCH, %1.reg - stwu RSCRATCH, {IND_RC_W, SP, 0-4} - - from SUM_ALL + LOGICAL_ALL to STACK - gen - COMMENT("stack SUM_ALL + LOGICAL_ALL") - move %1, RSCRATCH - stwu RSCRATCH, {IND_RC_W, SP, 0-4} - - from IND_ALL_BHW to STACK - gen - COMMENT("stack IND_ALL_BHW") + COMMENT("stack ANY_BHW-REG") move %1, RSCRATCH stwu RSCRATCH, {IND_RC_W, SP, 0-4} @@ -1198,21 +1178,11 @@ PATTERNS loc 1 sbi 4 - pat loc mlu $2==2 /* Unsigned multiply by constant */ - leaving - loc $1 - mli 4 - - pat mlu /* Unsigned multiply by var */ + pat mlu /* Multiply unsigned */ leaving mli $1 - pat loc slu /* Shift left unsigned by constant amount */ - leaving - loc $1 - sli $2 - - pat slu /* Shift left unsigned by variable amount */ + pat slu /* Shift left unsigned */ leaving sli $1 From 13beb5e336002c314d36f729b5aea4bb181e2a23 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Fri, 10 Feb 2017 11:59:34 -0500 Subject: [PATCH 12/12] Document RELOLIS from commit 1bf58cf. I hastily chose the name RELOLIS for this relocation type. If we want to rename it, we only need to edit these files: - h/out.h - mach/powerpc/as/mach5.c - util/amisc/ashow.c - util/led/ack.out.5 - util/led/relocate.c --- util/led/ack.out.5 | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/util/led/ack.out.5 b/util/led/ack.out.5 index fa997e8ca..c8b9fc8fa 100644 --- a/util/led/ack.out.5 +++ b/util/led/ack.out.5 @@ -161,12 +161,13 @@ struct outrelo { /* * relocation type bits */ -#define RELSZ 0x0fff /* relocation length */ +#define RELSZ 0x0fff /* relocation length */ #define RELO1 0x01 /* 1 byte */ #define RELO2 0x02 /* 2 bytes */ #define RELO4 0x03 /* 4 bytes */ #define RELOPPC 0x04 /* 26-bit PowerPC address */ -#define RELOVC4 0x06 /* VideoCore IV address in 32-bit insruction */ +#define RELOLIS 0x05 /* PowerPC lis */ +#define RELOVC4 0x06 /* VideoCore IV address in 32-bit insruction */ #define RELPC 0x2000 /* pc relative */ #define RELBR 0x4000 /* High order byte lowest address. */ #define RELWR 0x8000 /* High order word lowest address. */ @@ -225,10 +226,23 @@ is an absolute number, and the datum is relocated pc relative. The relocatable datum must then be relocated with respect to the base address of its section. .PP +For RELOPPC and RELOVC4, the relocatable datum is a PowerPC or +VideoCore IV instruction. +The relocation depends on the instruction, and uses an offset encoded +in the instruction. +.PP +RELOLIS assembles a PowerPC \fBlis\fR instruction. +The relocatable datum is a 4-byte integer. +The high bit is set for ha16 or clear for hi16. +The next 5 bits are the register \fIRT\fR. +The low 26 bits are a signed offset. +The relocation replaces the datum with the PowerPC instruction +\(oq\fBlis\fR\ \fIRT\fR,\ ha16[\fIsymbol\fR\ +\ \fIoffset\fR]\(cq. +.PP .B The symbol table. .br -This table contains definitions of symbols. It is referred to by -outrelo-structures, and can be used by debuggers. +This table contains definitions of symbols. +It is referred to by outrelo-structures, and can be used by debuggers. Entries in this table have the following structure: .PP .nf