From 054b9c87e1932043617aa81fdd773bff2f9640f3 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 13 Aug 2019 11:47:44 -0400
Subject: [PATCH] Add .data8 for 8-byte literal integers to the assembler.

This takes literal integers, not expressions, because each machine
defines its own valu_t for expressions, but valu_t can be too narrow
for an 8-byte integer, and I don't want to change all the machines to
use a wider valu_t.  Instead, change how the assembler parses literal
integers.  Remove the NUMBER token and add a NUMBER8 token for an
int64_t.  The new .data8 pseudo emits all 8 bytes of the int64_t;
expressions narrow the int64_t to a valu_t.  Don't add any checks for
integer overflow; expressions and .data* pseudos continue to ignore
overflow when a number is too wide.

This commit requires int64_t and uint64_t in the C compiler to build
the assembler.  The ACK's own C compiler doesn't have these.

For the assembler's temporary file, add NUMBER4 to store 4-byte
integers.  NUMBER4 acts like NUMBER[0-3] and only stores a
non-negative integer.  Each negative integer now takes 8 bytes (up
from 4) in the temporary file.

Move the `\fI` and `\fP` in the uni_ass(6) manual, so the square
brackets in `thing [, thing]*` are not italic.  This looks nicer in my
terminal, where italic text is underlined.
---
 mach/proto/as/comm1.h |  3 ++-
 mach/proto/as/comm2.y | 41 ++++++++++++++++++++++++++++----------
 mach/proto/as/comm3.c |  1 +
 mach/proto/as/comm5.c | 46 ++++++++++++++++++++++++++++---------------
 mach/proto/as/comm7.c | 15 ++++++++++++++
 man/uni_ass.6         | 20 ++++++++++++-------
 6 files changed, 92 insertions(+), 34 deletions(-)

diff --git a/mach/proto/as/comm1.h b/mach/proto/as/comm1.h
index 391675a98..29acbca6e 100644
--- a/mach/proto/as/comm1.h
+++ b/mach/proto/as/comm1.h
@@ -151,8 +151,9 @@ void	 emit1(int);
 void	 emit2(int);
 void	 emit4(long);
 void	 emitx(valu_t, int);
-void     emitf(int size, int negative);
+void	 emit8(int64_t);
 void	 emitstr(int);
+void	 emitf(int size, int negative);
 void	 yyerror(const char *);
 void	 nosect(void);
 void	 fatal(const char *, ...);
diff --git a/mach/proto/as/comm2.y b/mach/proto/as/comm2.y
index 3e1229a41..1e28979fa 100644
--- a/mach/proto/as/comm2.y
+++ b/mach/proto/as/comm2.y
@@ -22,6 +22,7 @@ static item_t	*last_it, *o_it;
 %union {
 	word_t	y_word;
 	valu_t	y_valu;
+	int64_t	y_valu8;
 	expr_t	y_expr;
 	item_t	*y_item;
 #ifdef ASLD
@@ -38,15 +39,17 @@ static item_t	*last_it, *o_it;
 %token <y_valu> CODE1
 %token <y_valu> CODE2
 %token <y_valu> CODE4
-%token NUMBER0		/* keep NUMBER* in this order */
+%token NUMBER0		/* keep NUMBER[0-4] in this order */
 %token NUMBER1
 %token NUMBER2
 %token NUMBER3
-%token <y_valu> NUMBER
+%token NUMBER4
+%token <y_valu8> NUMBER8
 %token NUMBERF
 %token DOT
 %token EXTERN
 %token <y_word> DATA
+%token DATA8
 %token <y_word> DATAF
 %token <y_word> ASCII
 %token SECTION
@@ -70,10 +73,11 @@ static item_t	*last_it, *o_it;
 %left '<' '>' OP_LE OP_GE
 %left OP_LL OP_RR
 %left '+' '-'
-%left '*' '/' '%' 
+%left '*' '/' '%'
 %nonassoc '~'
 
 %type <y_valu> absexp optabs1 optabs2
+%type <y_valu8> datum8
 %type <y_expr> expr
 %type <y_item> id_fb
 
@@ -105,7 +109,7 @@ program	:	/* empty */
 #endif
 	|	program IDENT ':'
 			{	newident($2, DOTTYP); newlabel($2);}
-	|	program NUMBER ':'
+	|	program NUMBER8 ':'
 			{	if ($2 < 0 || $2 > 9) {
 					serror("bad f/b label");
 					$2 = 0;
@@ -121,8 +125,8 @@ program	:	/* empty */
 	|	program operation ';'
 	|	program operation '\n'
 			{	lineno++; LISTLINE(1); RELODONE;}
-	|	program '#' NUMBER STRING '\n'
-			{	lineno = $3;
+	|	program '#' NUMBER8 STRING '\n'
+			{	lineno = $3; /* long = int64_t */
 				if (modulename) strncpy(modulename, stringbuf, STRINGMAX-1);
 				LISTLINE(1); RELODONE;
 			}
@@ -251,7 +255,8 @@ operation
 				DOTSCT->s_zero += $2;
 			}
 	|	DATA datalist
-	|   DATAF dataflist
+	|	DATA8 data8list
+	|	DATAF dataflist
 	|	ASCII STRING
 			{	emitstr($1);}
 	;
@@ -280,6 +285,20 @@ datalist
 			}
 	;
 
+/* datum8 isn't expr, because int64_t may be wider than valu_t. */
+datum8	:	NUMBER8
+			{	$$ = $1;}
+	|	'-' NUMBER8
+			{	$$ = -$2;}
+	;
+
+data8list
+	:	datum8
+			{	emit8($1);}
+	|	data8list ',' datum8
+			{	emit8($3);}
+	;
+
 numberf
 	:	NUMBERF
 			{
@@ -300,10 +319,12 @@ expr	:	error
 			{	serror("expr syntax err");
 				$$.val = 0; $$.typ = S_UND;
 			}
-	|	NUMBER
-			{	$$.val = $1; $$.typ = S_ABS;}
+	|	NUMBER8
+			{	$$.val = $1; /* valu_t = int64_t */
+				$$.typ = S_ABS;
+			}
 	|	id_fb
-			{	$$.val = load($1); 
+			{	$$.val = load($1);
 				last_it = $1;
 				$$.typ = $1->i_type & ~S_EXT;
 			}
diff --git a/mach/proto/as/comm3.c b/mach/proto/as/comm3.c
index 2b3afaba5..f45aa723c 100644
--- a/mach/proto/as/comm3.c
+++ b/mach/proto/as/comm3.c
@@ -29,6 +29,7 @@ item_t	keytab[] = {
 	{0,	DATA,		RELO1,	".data1"},
 	{0,	DATA,		RELO2,	".data2"},
 	{0,	DATA,		RELO4,	".data4"},
+	{0,	DATA8,		0,	".data8"},
 	{0,  DATAF,      4,      ".dataf4"},
 	{0,  DATAF,      8,      ".dataf8"},
 	{0,	ASCII,		0,		".ascii"},
diff --git a/mach/proto/as/comm5.c b/mach/proto/as/comm5.c
index 2b301a2e3..2987aedb2 100644
--- a/mach/proto/as/comm5.c
+++ b/mach/proto/as/comm5.c
@@ -101,7 +101,7 @@ int yylex(void)
 
 void putval(int c)
 {
-	valu_t v;
+	int64_t v;
 	int n = 0;
 	char* p = 0;
 
@@ -110,27 +110,32 @@ void putval(int c)
 	{
 		case CODE1:
 			n = 1;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE2:
 			n = 2;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE4:
 			n = 4;
-			goto putnum;
-		case NUMBER:
 			v = yylval.y_valu;
+			goto putnum;
+		case NUMBER8:
+			v = yylval.y_valu8;
 			for (n = 0; n < sizeof(v); n++)
 			{
 				if (v == 0)
 					break;
 				v >>= 8;
 			}
-			assert(n <= 4);
-			c = NUMBER0 + n;
+			if (n <= 4)
+				c = NUMBER0 + n;
+			else
+				n = 8;
+			v = yylval.y_valu8;
 		putnum:
 			putc(c, tempfile);
 			putc(c >> 8, tempfile);
-			v = yylval.y_valu;
 			while (--n >= 0)
 				putc((int)(v >> (n * 8)), tempfile);
 			return;
@@ -188,8 +193,8 @@ void putval(int c)
 
 int getval(int c)
 {
+	int64_t v;
 	int n = 0;
-	valu_t v;
 	char* p = 0;
 
 	switch (c)
@@ -204,22 +209,26 @@ int getval(int c)
 			n = 4;
 			goto getnum;
 		case NUMBER0:
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER1:
 			n = 1;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER2:
 			n = 2;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER3:
 			n = 3;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
-		case NUMBER:
+		case NUMBER4:
 			n = 4;
+			c = NUMBER8;
+			goto getnum;
+		case NUMBER8:
+			n = 8;
 		getnum:
 			v = 0;
 			while (--n >= 0)
@@ -227,7 +236,10 @@ int getval(int c)
 				v <<= 8;
 				v |= getc(tempfile);
 			}
-			yylval.y_valu = v;
+			if (c == NUMBER8)
+				yylval.y_valu8 = v;
+			else
+				yylval.y_valu = v;
 			return (c);
 		case IDENT:
 		case FBSYM:
@@ -409,6 +421,7 @@ static void need_stringbuf()
 
 static int innumber(int c)
 {
+	uint64_t uv;
 	char* p;
 	int radix;
 	static char num[40 + 1];
@@ -450,7 +463,7 @@ static int innumber(int c)
 	}
 	if (radix != 16 && (c == 'f' || c == 'b'))
 		return (infbsym(num));
-	yylval.y_valu = 0;
+	uv = 0;
 	while ((c = *p++))
 	{
 		if (c > '9')
@@ -458,9 +471,10 @@ static int innumber(int c)
 		c -= '0';
 		if ((unsigned)c >= radix)
 			serror("digit exceeds radix");
-		yylval.y_valu = yylval.y_valu * radix + c;
+		uv = uv * radix + c;
 	}
-	return (NUMBER);
+	yylval.y_valu8 = uv; /* signed = unsigned */
+	return (NUMBER8);
 
 floatconstant:
 	do
diff --git a/mach/proto/as/comm7.c b/mach/proto/as/comm7.c
index 418bf363c..9b93139d4 100644
--- a/mach/proto/as/comm7.c
+++ b/mach/proto/as/comm7.c
@@ -336,6 +336,21 @@ void emitx(valu_t val, int n)
 	}
 }
 
+void emit8(int64_t arg)
+{
+#ifdef WORDS_REVERSED
+	emit2((int)(arg >> 48));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg));
+#else
+	emit2((int)(arg));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 48));
+#endif
+}
+
 void emitstr(int zero)
 {
 	int i;
diff --git a/man/uni_ass.6 b/man/uni_ass.6
index 6d970621e..558fae2de 100644
--- a/man/uni_ass.6
+++ b/man/uni_ass.6
@@ -185,10 +185,10 @@ machine.
 \&\\$1
 .sp 1
 ..
-.Pu ".extern \fIidentifier [, identifier]*\fP"
+.Pu ".extern \fIidentifier\fP [, \fIidentifier\fP]*"
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".define \fIidentifier [, identifier]*\fP"
+.Pu ".define \fIidentifier\fP [, \fIidentifier\fP]*"
 Used for modules that are to be part of a libary.
 The .define pseudo's should be the first in such modules.
 When scanning a module in a library the assembler\-loader
@@ -197,21 +197,27 @@ mentioned in a .define list. If so, it includes that module in
 the program.
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".data1 \fIexpression [, expression]*\fP"
+.Pu ".data1 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of bytes.
 This is not followed by automatic alignment.
-.Pu ".data2 \fIexpression [, expression]*\fP"
+.Pu ".data2 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of shorts (2-byte values).
 This is not followed by automatic alignment.
-.Pu ".data4 \fIexpression [, expression]*\fP"
+.Pu ".data4 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of longs (4-byte values).
 This is not followed by automatic alignment.
-.Pu ".dataf4 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".data8 \fIliteralint\fP [, \fIliteralint\fP]*"
+Initialize a sequence of long longs (8-byte values).
+This accepts only literal integers, not symbols nor expressions; but
+a \fIliteralint\fP may be any signed or unsigned 8-byte integer, even
+if it is outside the usual range for the machine.
+This is not followed by automatic alignment.
+.Pu ".dataf4 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of floats (4-byte values).
 The values must be literal floating point constants containing
 a dot character.
 This is not followed by automatic alignment.
-.Pu ".dataf8 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".dataf8 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of doubles (8-byte values).
 The values must be literal floating point constants containing
 a dot character.