From 054b9c87e1932043617aa81fdd773bff2f9640f3 Mon Sep 17 00:00:00 2001 From: George Koehler Date: Tue, 13 Aug 2019 11:47:44 -0400 Subject: [PATCH] Add .data8 for 8-byte literal integers to the assembler. This takes literal integers, not expressions, because each machine defines its own valu_t for expressions, but valu_t can be too narrow for an 8-byte integer, and I don't want to change all the machines to use a wider valu_t. Instead, change how the assembler parses literal integers. Remove the NUMBER token and add a NUMBER8 token for an int64_t. The new .data8 pseudo emits all 8 bytes of the int64_t; expressions narrow the int64_t to a valu_t. Don't add any checks for integer overflow; expressions and .data* pseudos continue to ignore overflow when a number is too wide. This commit requires int64_t and uint64_t in the C compiler to build the assembler. The ACK's own C compiler doesn't have these. For the assembler's temporary file, add NUMBER4 to store 4-byte integers. NUMBER4 acts like NUMBER[0-3] and only stores a non-negative integer. Each negative integer now takes 8 bytes (up from 4) in the temporary file. Move the `\fI` and `\fP` in the uni_ass(6) manual, so the square brackets in `thing [, thing]*` are not italic. This looks nicer in my terminal, where italic text is underlined. --- mach/proto/as/comm1.h | 3 ++- mach/proto/as/comm2.y | 41 ++++++++++++++++++++++++++++---------- mach/proto/as/comm3.c | 1 + mach/proto/as/comm5.c | 46 ++++++++++++++++++++++++++++--------------- mach/proto/as/comm7.c | 15 ++++++++++++++ man/uni_ass.6 | 20 ++++++++++++------- 6 files changed, 92 insertions(+), 34 deletions(-) diff --git a/mach/proto/as/comm1.h b/mach/proto/as/comm1.h index 391675a98..29acbca6e 100644 --- a/mach/proto/as/comm1.h +++ b/mach/proto/as/comm1.h @@ -151,8 +151,9 @@ void emit1(int); void emit2(int); void emit4(long); void emitx(valu_t, int); -void emitf(int size, int negative); +void emit8(int64_t); void emitstr(int); +void emitf(int size, int negative); void yyerror(const char *); void nosect(void); void fatal(const char *, ...); diff --git a/mach/proto/as/comm2.y b/mach/proto/as/comm2.y index 3e1229a41..1e28979fa 100644 --- a/mach/proto/as/comm2.y +++ b/mach/proto/as/comm2.y @@ -22,6 +22,7 @@ static item_t *last_it, *o_it; %union { word_t y_word; valu_t y_valu; + int64_t y_valu8; expr_t y_expr; item_t *y_item; #ifdef ASLD @@ -38,15 +39,17 @@ static item_t *last_it, *o_it; %token CODE1 %token CODE2 %token CODE4 -%token NUMBER0 /* keep NUMBER* in this order */ +%token NUMBER0 /* keep NUMBER[0-4] in this order */ %token NUMBER1 %token NUMBER2 %token NUMBER3 -%token NUMBER +%token NUMBER4 +%token NUMBER8 %token NUMBERF %token DOT %token EXTERN %token DATA +%token DATA8 %token DATAF %token ASCII %token SECTION @@ -70,10 +73,11 @@ static item_t *last_it, *o_it; %left '<' '>' OP_LE OP_GE %left OP_LL OP_RR %left '+' '-' -%left '*' '/' '%' +%left '*' '/' '%' %nonassoc '~' %type absexp optabs1 optabs2 +%type datum8 %type expr %type id_fb @@ -105,7 +109,7 @@ program : /* empty */ #endif | program IDENT ':' { newident($2, DOTTYP); newlabel($2);} - | program NUMBER ':' + | program NUMBER8 ':' { if ($2 < 0 || $2 > 9) { serror("bad f/b label"); $2 = 0; @@ -121,8 +125,8 @@ program : /* empty */ | program operation ';' | program operation '\n' { lineno++; LISTLINE(1); RELODONE;} - | program '#' NUMBER STRING '\n' - { lineno = $3; + | program '#' NUMBER8 STRING '\n' + { lineno = $3; /* long = int64_t */ if (modulename) strncpy(modulename, stringbuf, STRINGMAX-1); LISTLINE(1); RELODONE; } @@ -251,7 +255,8 @@ operation DOTSCT->s_zero += $2; } | DATA datalist - | DATAF dataflist + | DATA8 data8list + | DATAF dataflist | ASCII STRING { emitstr($1);} ; @@ -280,6 +285,20 @@ datalist } ; +/* datum8 isn't expr, because int64_t may be wider than valu_t. */ +datum8 : NUMBER8 + { $$ = $1;} + | '-' NUMBER8 + { $$ = -$2;} + ; + +data8list + : datum8 + { emit8($1);} + | data8list ',' datum8 + { emit8($3);} + ; + numberf : NUMBERF { @@ -300,10 +319,12 @@ expr : error { serror("expr syntax err"); $$.val = 0; $$.typ = S_UND; } - | NUMBER - { $$.val = $1; $$.typ = S_ABS;} + | NUMBER8 + { $$.val = $1; /* valu_t = int64_t */ + $$.typ = S_ABS; + } | id_fb - { $$.val = load($1); + { $$.val = load($1); last_it = $1; $$.typ = $1->i_type & ~S_EXT; } diff --git a/mach/proto/as/comm3.c b/mach/proto/as/comm3.c index 2b3afaba5..f45aa723c 100644 --- a/mach/proto/as/comm3.c +++ b/mach/proto/as/comm3.c @@ -29,6 +29,7 @@ item_t keytab[] = { {0, DATA, RELO1, ".data1"}, {0, DATA, RELO2, ".data2"}, {0, DATA, RELO4, ".data4"}, + {0, DATA8, 0, ".data8"}, {0, DATAF, 4, ".dataf4"}, {0, DATAF, 8, ".dataf8"}, {0, ASCII, 0, ".ascii"}, diff --git a/mach/proto/as/comm5.c b/mach/proto/as/comm5.c index 2b301a2e3..2987aedb2 100644 --- a/mach/proto/as/comm5.c +++ b/mach/proto/as/comm5.c @@ -101,7 +101,7 @@ int yylex(void) void putval(int c) { - valu_t v; + int64_t v; int n = 0; char* p = 0; @@ -110,27 +110,32 @@ void putval(int c) { case CODE1: n = 1; + v = yylval.y_valu; goto putnum; case CODE2: n = 2; + v = yylval.y_valu; goto putnum; case CODE4: n = 4; - goto putnum; - case NUMBER: v = yylval.y_valu; + goto putnum; + case NUMBER8: + v = yylval.y_valu8; for (n = 0; n < sizeof(v); n++) { if (v == 0) break; v >>= 8; } - assert(n <= 4); - c = NUMBER0 + n; + if (n <= 4) + c = NUMBER0 + n; + else + n = 8; + v = yylval.y_valu8; putnum: putc(c, tempfile); putc(c >> 8, tempfile); - v = yylval.y_valu; while (--n >= 0) putc((int)(v >> (n * 8)), tempfile); return; @@ -188,8 +193,8 @@ void putval(int c) int getval(int c) { + int64_t v; int n = 0; - valu_t v; char* p = 0; switch (c) @@ -204,22 +209,26 @@ int getval(int c) n = 4; goto getnum; case NUMBER0: - c = NUMBER; + c = NUMBER8; goto getnum; case NUMBER1: n = 1; - c = NUMBER; + c = NUMBER8; goto getnum; case NUMBER2: n = 2; - c = NUMBER; + c = NUMBER8; goto getnum; case NUMBER3: n = 3; - c = NUMBER; + c = NUMBER8; goto getnum; - case NUMBER: + case NUMBER4: n = 4; + c = NUMBER8; + goto getnum; + case NUMBER8: + n = 8; getnum: v = 0; while (--n >= 0) @@ -227,7 +236,10 @@ int getval(int c) v <<= 8; v |= getc(tempfile); } - yylval.y_valu = v; + if (c == NUMBER8) + yylval.y_valu8 = v; + else + yylval.y_valu = v; return (c); case IDENT: case FBSYM: @@ -409,6 +421,7 @@ static void need_stringbuf() static int innumber(int c) { + uint64_t uv; char* p; int radix; static char num[40 + 1]; @@ -450,7 +463,7 @@ static int innumber(int c) } if (radix != 16 && (c == 'f' || c == 'b')) return (infbsym(num)); - yylval.y_valu = 0; + uv = 0; while ((c = *p++)) { if (c > '9') @@ -458,9 +471,10 @@ static int innumber(int c) c -= '0'; if ((unsigned)c >= radix) serror("digit exceeds radix"); - yylval.y_valu = yylval.y_valu * radix + c; + uv = uv * radix + c; } - return (NUMBER); + yylval.y_valu8 = uv; /* signed = unsigned */ + return (NUMBER8); floatconstant: do diff --git a/mach/proto/as/comm7.c b/mach/proto/as/comm7.c index 418bf363c..9b93139d4 100644 --- a/mach/proto/as/comm7.c +++ b/mach/proto/as/comm7.c @@ -336,6 +336,21 @@ void emitx(valu_t val, int n) } } +void emit8(int64_t arg) +{ +#ifdef WORDS_REVERSED + emit2((int)(arg >> 48)); + emit2((int)(arg >> 32)); + emit2((int)(arg >> 16)); + emit2((int)(arg)); +#else + emit2((int)(arg)); + emit2((int)(arg >> 16)); + emit2((int)(arg >> 32)); + emit2((int)(arg >> 48)); +#endif +} + void emitstr(int zero) { int i; diff --git a/man/uni_ass.6 b/man/uni_ass.6 index 6d970621e..558fae2de 100644 --- a/man/uni_ass.6 +++ b/man/uni_ass.6 @@ -185,10 +185,10 @@ machine. \&\\$1 .sp 1 .. -.Pu ".extern \fIidentifier [, identifier]*\fP" +.Pu ".extern \fIidentifier\fP [, \fIidentifier\fP]*" The identifiers mentioned in the list are exported and can be used in other modules. -.Pu ".define \fIidentifier [, identifier]*\fP" +.Pu ".define \fIidentifier\fP [, \fIidentifier\fP]*" Used for modules that are to be part of a libary. The .define pseudo's should be the first in such modules. When scanning a module in a library the assembler\-loader @@ -197,21 +197,27 @@ mentioned in a .define list. If so, it includes that module in the program. The identifiers mentioned in the list are exported and can be used in other modules. -.Pu ".data1 \fIexpression [, expression]*\fP" +.Pu ".data1 \fIexpression\fP [, \fIexpression\fP]*" Initialize a sequence of bytes. This is not followed by automatic alignment. -.Pu ".data2 \fIexpression [, expression]*\fP" +.Pu ".data2 \fIexpression\fP [, \fIexpression\fP]*" Initialize a sequence of shorts (2-byte values). This is not followed by automatic alignment. -.Pu ".data4 \fIexpression [, expression]*\fP" +.Pu ".data4 \fIexpression\fP [, \fIexpression\fP]*" Initialize a sequence of longs (4-byte values). This is not followed by automatic alignment. -.Pu ".dataf4 \fIliteralfloat [, literalfloat]*\fP" +.Pu ".data8 \fIliteralint\fP [, \fIliteralint\fP]*" +Initialize a sequence of long longs (8-byte values). +This accepts only literal integers, not symbols nor expressions; but +a \fIliteralint\fP may be any signed or unsigned 8-byte integer, even +if it is outside the usual range for the machine. +This is not followed by automatic alignment. +.Pu ".dataf4 \fIliteralfloat\fP [, \fIliteralfloat\fP]*" Initialize a sequence of floats (4-byte values). The values must be literal floating point constants containing a dot character. This is not followed by automatic alignment. -.Pu ".dataf8 \fIliteralfloat [, literalfloat]*\fP" +.Pu ".dataf8 \fIliteralfloat\fP [, \fIliteralfloat\fP]*" Initialize a sequence of doubles (8-byte values). The values must be literal floating point constants containing a dot character.