Add .data8 for 8-byte literal integers to the assembler.

This takes literal integers, not expressions, because each machine defines its own valu_t for expressions, but valu_t can be too narrow for an 8-byte integer, and I don't want to change all the machines to use a wider valu_t. Instead, change how the assembler parses literal integers. Remove the NUMBER token and add a NUMBER8 token for an int64_t. The new .data8 pseudo emits all 8 bytes of the int64_t; expressions narrow the int64_t to a valu_t. Don't add any checks for integer overflow; expressions and .data* pseudos continue to ignore overflow when a number is too wide. This commit requires int64_t and uint64_t in the C compiler to build the assembler. The ACK's own C compiler doesn't have these. For the assembler's temporary file, add NUMBER4 to store 4-byte integers. NUMBER4 acts like NUMBER[0-3] and only stores a non-negative integer. Each negative integer now takes 8 bytes (up from 4) in the temporary file. Move the `\fI` and `\fP` in the uni_ass(6) manual, so the square brackets in `thing [, thing]*` are not italic. This looks nicer in my terminal, where italic text is underlined.
2019-08-13 11:47:44 -04:00 · 2019-08-13 11:47:44 -04:00 · 054b9c87e1
commit 054b9c87e1
parent c2604dbb04
6 changed files with 92 additions and 34 deletions
--- a/mach/proto/as/comm1.h
+++ b/mach/proto/as/comm1.h
@ -151,8 +151,9 @@ void	 emit1(int);
 void	 emit2(int);
 void	 emit4(long);
 void	 emitx(valu_t, int);
-void     emitf(int size, int negative);
+void	 emit8(int64_t);
 void	 emitstr(int);
+void	 emitf(int size, int negative);
 void	 yyerror(const char *);
 void	 nosect(void);
 void	 fatal(const char *, ...);
--- a/mach/proto/as/comm2.y
+++ b/mach/proto/as/comm2.y
@ -22,6 +22,7 @@ static item_t	*last_it, *o_it;
 %union {
 	word_t	y_word;
 	valu_t	y_valu;
+	int64_t	y_valu8;
 	expr_t	y_expr;
 	item_t	*y_item;
 #ifdef ASLD
@ -38,15 +39,17 @@ static item_t	*last_it, *o_it;
 %token <y_valu> CODE1
 %token <y_valu> CODE2
 %token <y_valu> CODE4
-%token NUMBER0		/* keep NUMBER* in this order */
+%token NUMBER0		/* keep NUMBER[0-4] in this order */
 %token NUMBER1
 %token NUMBER2
 %token NUMBER3
-%token <y_valu> NUMBER
+%token NUMBER4
+%token <y_valu8> NUMBER8
 %token NUMBERF
 %token DOT
 %token EXTERN
 %token <y_word> DATA
+%token DATA8
 %token <y_word> DATAF
 %token <y_word> ASCII
 %token SECTION
@ -74,6 +77,7 @@ static item_t	*last_it, *o_it;
 %nonassoc '~'

 %type <y_valu> absexp optabs1 optabs2
+%type <y_valu8> datum8
 %type <y_expr> expr
 %type <y_item> id_fb

@ -105,7 +109,7 @@ program	:	/* empty */
 #endif
 	|	program IDENT ':'
 			{	newident($2, DOTTYP); newlabel($2);}
-	|	program NUMBER ':'
+	|	program NUMBER8 ':'
 			{	if ($2 < 0 || $2 > 9) {
 					serror("bad f/b label");
 					$2 = 0;
@ -121,8 +125,8 @@ program	:	/* empty */
 	|	program operation ';'
 	|	program operation '\n'
 			{	lineno++; LISTLINE(1); RELODONE;}
-	|	program '#' NUMBER STRING '\n'
-			{	lineno = $3;
+	|	program '#' NUMBER8 STRING '\n'
+			{	lineno = $3; /* long = int64_t */
 				if (modulename) strncpy(modulename, stringbuf, STRINGMAX-1);
 				LISTLINE(1); RELODONE;
 			}
@ -251,6 +255,7 @@ operation
 				DOTSCT->s_zero += $2;
 			}
 	|	DATA datalist
+	|	DATA8 data8list
 	|	DATAF dataflist
 	|	ASCII STRING
 			{	emitstr($1);}
@ -280,6 +285,20 @@ datalist
 			}
 	;

+/* datum8 isn't expr, because int64_t may be wider than valu_t. */
+datum8	:	NUMBER8
+			{	$$ = $1;}
+	|	'-' NUMBER8
+			{	$$ = -$2;}
+	;
+
+data8list
+	:	datum8
+			{	emit8($1);}
+	|	data8list ',' datum8
+			{	emit8($3);}
+	;
+
 numberf
 	:	NUMBERF
 			{
@ -300,8 +319,10 @@ expr	:	error
 			{	serror("expr syntax err");
 				$$.val = 0; $$.typ = S_UND;
 			}
-	|	NUMBER
-			{	$$.val = $1; $$.typ = S_ABS;}
+	|	NUMBER8
+			{	$$.val = $1; /* valu_t = int64_t */
+				$$.typ = S_ABS;
+			}
 	|	id_fb
 			{	$$.val = load($1);
 				last_it = $1;
--- a/mach/proto/as/comm3.c
+++ b/mach/proto/as/comm3.c
@ -29,6 +29,7 @@ item_t	keytab[] = {
 	{0,	DATA,		RELO1,	".data1"},
 	{0,	DATA,		RELO2,	".data2"},
 	{0,	DATA,		RELO4,	".data4"},
+	{0,	DATA8,		0,	".data8"},
 	{0,  DATAF,      4,      ".dataf4"},
 	{0,  DATAF,      8,      ".dataf8"},
 	{0,	ASCII,		0,		".ascii"},
--- a/mach/proto/as/comm5.c
+++ b/mach/proto/as/comm5.c
@ -101,7 +101,7 @@ int yylex(void)

 void putval(int c)
 {
-	valu_t v;
+	int64_t v;
 	int n = 0;
 	char* p = 0;

@ -110,27 +110,32 @@ void putval(int c)
 	{
 		case CODE1:
 			n = 1;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE2:
 			n = 2;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE4:
 			n = 4;
-			goto putnum;
-		case NUMBER:
 			v = yylval.y_valu;
+			goto putnum;
+		case NUMBER8:
+			v = yylval.y_valu8;
 			for (n = 0; n < sizeof(v); n++)
 			{
 				if (v == 0)
 					break;
 				v >>= 8;
 			}
-			assert(n <= 4);
+			if (n <= 4)
 				c = NUMBER0 + n;
+			else
+				n = 8;
+			v = yylval.y_valu8;
 		putnum:
 			putc(c, tempfile);
 			putc(c >> 8, tempfile);
-			v = yylval.y_valu;
 			while (--n >= 0)
 				putc((int)(v >> (n * 8)), tempfile);
 			return;
@ -188,8 +193,8 @@ void putval(int c)

 int getval(int c)
 {
+	int64_t v;
 	int n = 0;
-	valu_t v;
 	char* p = 0;

 	switch (c)
@ -204,22 +209,26 @@ int getval(int c)
 			n = 4;
 			goto getnum;
 		case NUMBER0:
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER1:
 			n = 1;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER2:
 			n = 2;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER3:
 			n = 3;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
-		case NUMBER:
+		case NUMBER4:
 			n = 4;
+			c = NUMBER8;
+			goto getnum;
+		case NUMBER8:
+			n = 8;
 		getnum:
 			v = 0;
 			while (--n >= 0)
@ -227,6 +236,9 @@ int getval(int c)
 				v <<= 8;
 				v |= getc(tempfile);
 			}
+			if (c == NUMBER8)
+				yylval.y_valu8 = v;
+			else
 				yylval.y_valu = v;
 			return (c);
 		case IDENT:
@ -409,6 +421,7 @@ static void need_stringbuf()

 static int innumber(int c)
 {
+	uint64_t uv;
 	char* p;
 	int radix;
 	static char num[40 + 1];
@ -450,7 +463,7 @@ static int innumber(int c)
 	}
 	if (radix != 16 && (c == 'f' || c == 'b'))
 		return (infbsym(num));
-	yylval.y_valu = 0;
+	uv = 0;
 	while ((c = *p++))
 	{
 		if (c > '9')
@ -458,9 +471,10 @@ static int innumber(int c)
 		c -= '0';
 		if ((unsigned)c >= radix)
 			serror("digit exceeds radix");
-		yylval.y_valu = yylval.y_valu * radix + c;
+		uv = uv * radix + c;
 	}
-	return (NUMBER);
+	yylval.y_valu8 = uv; /* signed = unsigned */
+	return (NUMBER8);

 floatconstant:
 	do
--- a/mach/proto/as/comm7.c
+++ b/mach/proto/as/comm7.c
@ -336,6 +336,21 @@ void emitx(valu_t val, int n)
 	}
 }

+void emit8(int64_t arg)
+{
+#ifdef WORDS_REVERSED
+	emit2((int)(arg >> 48));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg));
+#else
+	emit2((int)(arg));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 48));
+#endif
+}
+
 void emitstr(int zero)
 {
 	int i;
--- a/man/uni_ass.6
+++ b/man/uni_ass.6
@ -185,10 +185,10 @@ machine.
 \&\\$1
 .sp 1
 ..
-.Pu ".extern \fIidentifier [, identifier]*\fP"
+.Pu ".extern \fIidentifier\fP [, \fIidentifier\fP]*"
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".define \fIidentifier [, identifier]*\fP"
+.Pu ".define \fIidentifier\fP [, \fIidentifier\fP]*"
 Used for modules that are to be part of a libary.
 The .define pseudo's should be the first in such modules.
 When scanning a module in a library the assembler\-loader
@ -197,21 +197,27 @@ mentioned in a .define list. If so, it includes that module in
 the program.
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".data1 \fIexpression [, expression]*\fP"
+.Pu ".data1 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of bytes.
 This is not followed by automatic alignment.
-.Pu ".data2 \fIexpression [, expression]*\fP"
+.Pu ".data2 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of shorts (2-byte values).
 This is not followed by automatic alignment.
-.Pu ".data4 \fIexpression [, expression]*\fP"
+.Pu ".data4 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of longs (4-byte values).
 This is not followed by automatic alignment.
-.Pu ".dataf4 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".data8 \fIliteralint\fP [, \fIliteralint\fP]*"
+Initialize a sequence of long longs (8-byte values).
+This accepts only literal integers, not symbols nor expressions; but
+a \fIliteralint\fP may be any signed or unsigned 8-byte integer, even
+if it is outside the usual range for the machine.
+This is not followed by automatic alignment.
+.Pu ".dataf4 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of floats (4-byte values).
 The values must be literal floating point constants containing
 a dot character.
 This is not followed by automatic alignment.
-.Pu ".dataf8 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".dataf8 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of doubles (8-byte values).
 The values must be literal floating point constants containing
 a dot character.