From 054b9c87e1932043617aa81fdd773bff2f9640f3 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 13 Aug 2019 11:47:44 -0400
Subject: [PATCH 01/22] Add .data8 for 8-byte literal integers to the
 assembler.

This takes literal integers, not expressions, because each machine
defines its own valu_t for expressions, but valu_t can be too narrow
for an 8-byte integer, and I don't want to change all the machines to
use a wider valu_t.  Instead, change how the assembler parses literal
integers.  Remove the NUMBER token and add a NUMBER8 token for an
int64_t.  The new .data8 pseudo emits all 8 bytes of the int64_t;
expressions narrow the int64_t to a valu_t.  Don't add any checks for
integer overflow; expressions and .data* pseudos continue to ignore
overflow when a number is too wide.

This commit requires int64_t and uint64_t in the C compiler to build
the assembler.  The ACK's own C compiler doesn't have these.

For the assembler's temporary file, add NUMBER4 to store 4-byte
integers.  NUMBER4 acts like NUMBER[0-3] and only stores a
non-negative integer.  Each negative integer now takes 8 bytes (up
from 4) in the temporary file.

Move the `\fI` and `\fP` in the uni_ass(6) manual, so the square
brackets in `thing [, thing]*` are not italic.  This looks nicer in my
terminal, where italic text is underlined.
---
 mach/proto/as/comm1.h |  3 ++-
 mach/proto/as/comm2.y | 41 ++++++++++++++++++++++++++++----------
 mach/proto/as/comm3.c |  1 +
 mach/proto/as/comm5.c | 46 ++++++++++++++++++++++++++++---------------
 mach/proto/as/comm7.c | 15 ++++++++++++++
 man/uni_ass.6         | 20 ++++++++++++-------
 6 files changed, 92 insertions(+), 34 deletions(-)

diff --git a/mach/proto/as/comm1.h b/mach/proto/as/comm1.h
index 391675a98..29acbca6e 100644
--- a/mach/proto/as/comm1.h
+++ b/mach/proto/as/comm1.h
@@ -151,8 +151,9 @@ void	 emit1(int);
 void	 emit2(int);
 void	 emit4(long);
 void	 emitx(valu_t, int);
-void     emitf(int size, int negative);
+void	 emit8(int64_t);
 void	 emitstr(int);
+void	 emitf(int size, int negative);
 void	 yyerror(const char *);
 void	 nosect(void);
 void	 fatal(const char *, ...);
diff --git a/mach/proto/as/comm2.y b/mach/proto/as/comm2.y
index 3e1229a41..1e28979fa 100644
--- a/mach/proto/as/comm2.y
+++ b/mach/proto/as/comm2.y
@@ -22,6 +22,7 @@ static item_t	*last_it, *o_it;
 %union {
 	word_t	y_word;
 	valu_t	y_valu;
+	int64_t	y_valu8;
 	expr_t	y_expr;
 	item_t	*y_item;
 #ifdef ASLD
@@ -38,15 +39,17 @@ static item_t	*last_it, *o_it;
 %token <y_valu> CODE1
 %token <y_valu> CODE2
 %token <y_valu> CODE4
-%token NUMBER0		/* keep NUMBER* in this order */
+%token NUMBER0		/* keep NUMBER[0-4] in this order */
 %token NUMBER1
 %token NUMBER2
 %token NUMBER3
-%token <y_valu> NUMBER
+%token NUMBER4
+%token <y_valu8> NUMBER8
 %token NUMBERF
 %token DOT
 %token EXTERN
 %token <y_word> DATA
+%token DATA8
 %token <y_word> DATAF
 %token <y_word> ASCII
 %token SECTION
@@ -70,10 +73,11 @@ static item_t	*last_it, *o_it;
 %left '<' '>' OP_LE OP_GE
 %left OP_LL OP_RR
 %left '+' '-'
-%left '*' '/' '%' 
+%left '*' '/' '%'
 %nonassoc '~'
 
 %type <y_valu> absexp optabs1 optabs2
+%type <y_valu8> datum8
 %type <y_expr> expr
 %type <y_item> id_fb
 
@@ -105,7 +109,7 @@ program	:	/* empty */
 #endif
 	|	program IDENT ':'
 			{	newident($2, DOTTYP); newlabel($2);}
-	|	program NUMBER ':'
+	|	program NUMBER8 ':'
 			{	if ($2 < 0 || $2 > 9) {
 					serror("bad f/b label");
 					$2 = 0;
@@ -121,8 +125,8 @@ program	:	/* empty */
 	|	program operation ';'
 	|	program operation '\n'
 			{	lineno++; LISTLINE(1); RELODONE;}
-	|	program '#' NUMBER STRING '\n'
-			{	lineno = $3;
+	|	program '#' NUMBER8 STRING '\n'
+			{	lineno = $3; /* long = int64_t */
 				if (modulename) strncpy(modulename, stringbuf, STRINGMAX-1);
 				LISTLINE(1); RELODONE;
 			}
@@ -251,7 +255,8 @@ operation
 				DOTSCT->s_zero += $2;
 			}
 	|	DATA datalist
-	|   DATAF dataflist
+	|	DATA8 data8list
+	|	DATAF dataflist
 	|	ASCII STRING
 			{	emitstr($1);}
 	;
@@ -280,6 +285,20 @@ datalist
 			}
 	;
 
+/* datum8 isn't expr, because int64_t may be wider than valu_t. */
+datum8	:	NUMBER8
+			{	$$ = $1;}
+	|	'-' NUMBER8
+			{	$$ = -$2;}
+	;
+
+data8list
+	:	datum8
+			{	emit8($1);}
+	|	data8list ',' datum8
+			{	emit8($3);}
+	;
+
 numberf
 	:	NUMBERF
 			{
@@ -300,10 +319,12 @@ expr	:	error
 			{	serror("expr syntax err");
 				$$.val = 0; $$.typ = S_UND;
 			}
-	|	NUMBER
-			{	$$.val = $1; $$.typ = S_ABS;}
+	|	NUMBER8
+			{	$$.val = $1; /* valu_t = int64_t */
+				$$.typ = S_ABS;
+			}
 	|	id_fb
-			{	$$.val = load($1); 
+			{	$$.val = load($1);
 				last_it = $1;
 				$$.typ = $1->i_type & ~S_EXT;
 			}
diff --git a/mach/proto/as/comm3.c b/mach/proto/as/comm3.c
index 2b3afaba5..f45aa723c 100644
--- a/mach/proto/as/comm3.c
+++ b/mach/proto/as/comm3.c
@@ -29,6 +29,7 @@ item_t	keytab[] = {
 	{0,	DATA,		RELO1,	".data1"},
 	{0,	DATA,		RELO2,	".data2"},
 	{0,	DATA,		RELO4,	".data4"},
+	{0,	DATA8,		0,	".data8"},
 	{0,  DATAF,      4,      ".dataf4"},
 	{0,  DATAF,      8,      ".dataf8"},
 	{0,	ASCII,		0,		".ascii"},
diff --git a/mach/proto/as/comm5.c b/mach/proto/as/comm5.c
index 2b301a2e3..2987aedb2 100644
--- a/mach/proto/as/comm5.c
+++ b/mach/proto/as/comm5.c
@@ -101,7 +101,7 @@ int yylex(void)
 
 void putval(int c)
 {
-	valu_t v;
+	int64_t v;
 	int n = 0;
 	char* p = 0;
 
@@ -110,27 +110,32 @@ void putval(int c)
 	{
 		case CODE1:
 			n = 1;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE2:
 			n = 2;
+			v = yylval.y_valu;
 			goto putnum;
 		case CODE4:
 			n = 4;
-			goto putnum;
-		case NUMBER:
 			v = yylval.y_valu;
+			goto putnum;
+		case NUMBER8:
+			v = yylval.y_valu8;
 			for (n = 0; n < sizeof(v); n++)
 			{
 				if (v == 0)
 					break;
 				v >>= 8;
 			}
-			assert(n <= 4);
-			c = NUMBER0 + n;
+			if (n <= 4)
+				c = NUMBER0 + n;
+			else
+				n = 8;
+			v = yylval.y_valu8;
 		putnum:
 			putc(c, tempfile);
 			putc(c >> 8, tempfile);
-			v = yylval.y_valu;
 			while (--n >= 0)
 				putc((int)(v >> (n * 8)), tempfile);
 			return;
@@ -188,8 +193,8 @@ void putval(int c)
 
 int getval(int c)
 {
+	int64_t v;
 	int n = 0;
-	valu_t v;
 	char* p = 0;
 
 	switch (c)
@@ -204,22 +209,26 @@ int getval(int c)
 			n = 4;
 			goto getnum;
 		case NUMBER0:
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER1:
 			n = 1;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER2:
 			n = 2;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
 		case NUMBER3:
 			n = 3;
-			c = NUMBER;
+			c = NUMBER8;
 			goto getnum;
-		case NUMBER:
+		case NUMBER4:
 			n = 4;
+			c = NUMBER8;
+			goto getnum;
+		case NUMBER8:
+			n = 8;
 		getnum:
 			v = 0;
 			while (--n >= 0)
@@ -227,7 +236,10 @@ int getval(int c)
 				v <<= 8;
 				v |= getc(tempfile);
 			}
-			yylval.y_valu = v;
+			if (c == NUMBER8)
+				yylval.y_valu8 = v;
+			else
+				yylval.y_valu = v;
 			return (c);
 		case IDENT:
 		case FBSYM:
@@ -409,6 +421,7 @@ static void need_stringbuf()
 
 static int innumber(int c)
 {
+	uint64_t uv;
 	char* p;
 	int radix;
 	static char num[40 + 1];
@@ -450,7 +463,7 @@ static int innumber(int c)
 	}
 	if (radix != 16 && (c == 'f' || c == 'b'))
 		return (infbsym(num));
-	yylval.y_valu = 0;
+	uv = 0;
 	while ((c = *p++))
 	{
 		if (c > '9')
@@ -458,9 +471,10 @@ static int innumber(int c)
 		c -= '0';
 		if ((unsigned)c >= radix)
 			serror("digit exceeds radix");
-		yylval.y_valu = yylval.y_valu * radix + c;
+		uv = uv * radix + c;
 	}
-	return (NUMBER);
+	yylval.y_valu8 = uv; /* signed = unsigned */
+	return (NUMBER8);
 
 floatconstant:
 	do
diff --git a/mach/proto/as/comm7.c b/mach/proto/as/comm7.c
index 418bf363c..9b93139d4 100644
--- a/mach/proto/as/comm7.c
+++ b/mach/proto/as/comm7.c
@@ -336,6 +336,21 @@ void emitx(valu_t val, int n)
 	}
 }
 
+void emit8(int64_t arg)
+{
+#ifdef WORDS_REVERSED
+	emit2((int)(arg >> 48));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg));
+#else
+	emit2((int)(arg));
+	emit2((int)(arg >> 16));
+	emit2((int)(arg >> 32));
+	emit2((int)(arg >> 48));
+#endif
+}
+
 void emitstr(int zero)
 {
 	int i;
diff --git a/man/uni_ass.6 b/man/uni_ass.6
index 6d970621e..558fae2de 100644
--- a/man/uni_ass.6
+++ b/man/uni_ass.6
@@ -185,10 +185,10 @@ machine.
 \&\\$1
 .sp 1
 ..
-.Pu ".extern \fIidentifier [, identifier]*\fP"
+.Pu ".extern \fIidentifier\fP [, \fIidentifier\fP]*"
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".define \fIidentifier [, identifier]*\fP"
+.Pu ".define \fIidentifier\fP [, \fIidentifier\fP]*"
 Used for modules that are to be part of a libary.
 The .define pseudo's should be the first in such modules.
 When scanning a module in a library the assembler\-loader
@@ -197,21 +197,27 @@ mentioned in a .define list. If so, it includes that module in
 the program.
 The identifiers mentioned in the list are exported and can be
 used in other modules.
-.Pu ".data1 \fIexpression [, expression]*\fP"
+.Pu ".data1 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of bytes.
 This is not followed by automatic alignment.
-.Pu ".data2 \fIexpression [, expression]*\fP"
+.Pu ".data2 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of shorts (2-byte values).
 This is not followed by automatic alignment.
-.Pu ".data4 \fIexpression [, expression]*\fP"
+.Pu ".data4 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of longs (4-byte values).
 This is not followed by automatic alignment.
-.Pu ".dataf4 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".data8 \fIliteralint\fP [, \fIliteralint\fP]*"
+Initialize a sequence of long longs (8-byte values).
+This accepts only literal integers, not symbols nor expressions; but
+a \fIliteralint\fP may be any signed or unsigned 8-byte integer, even
+if it is outside the usual range for the machine.
+This is not followed by automatic alignment.
+.Pu ".dataf4 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of floats (4-byte values).
 The values must be literal floating point constants containing
 a dot character.
 This is not followed by automatic alignment.
-.Pu ".dataf8 \fIliteralfloat [, literalfloat]*\fP"
+.Pu ".dataf8 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of doubles (8-byte values).
 The values must be literal floating point constants containing
 a dot character.

From 1faff418ec1943d2b4715763ed7beb1829d512a6 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 13 Aug 2019 15:37:05 -0400
Subject: [PATCH 02/22] Teach some ncg machines to use .data8

This turns EM `con 5000000000I8` into assembly `.data8 5000000000` for
machines i386, i80, i86, m68020, powerpc, vc4.  These are the only ncg
machines in our build.

i80 and i86 get con_mult(sz) for sz == 4 and sz == 8.  The other
machines only get sz == 8, because they have 4-byte words, and ncg
only calls con_mult(sz) when sz is greater than the word size.  The
tab "\t" after .data4 or .data8 is like the tabs in the con_*() macros
of mach/*/ncg/mach.h.

i86 now uses .data4, like i80.  Also, i86 and i386 now use the numeric
string without converting it to an integer and back to a string.
---
 mach/i386/ncg/mach.c    | 8 +++-----
 mach/i80/ncg/mach.c     | 8 ++++----
 mach/i86/ncg/mach.c     | 9 +++------
 mach/m68020/ncg/mach.c  | 6 +++---
 mach/powerpc/ncg/mach.c | 7 +++----
 mach/vc4/ncg/mach.c     | 9 +++++----
 6 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/mach/i386/ncg/mach.c b/mach/i386/ncg/mach.c
index 34a4b6f16..5f67a3dfb 100644
--- a/mach/i386/ncg/mach.c
+++ b/mach/i386/ncg/mach.c
@@ -34,13 +34,11 @@ con_part(sz,w) register sz; word w; {
 }
 
 void
-con_mult(sz) word sz; {
-	long l;
+con_mult(word sz) {
 
-	if (sz != 4)
+	if (sz != 8)
 		fatal("bad icon/ucon size");
-	l = atol(str);
-	fprintf(codefile,"\t.data4 %ld\n", l);
+	fprintf(codefile,".data8\t%s\n", str);
 }
 
 #define CODE_GENERATOR  
diff --git a/mach/i80/ncg/mach.c b/mach/i80/ncg/mach.c
index 968ececbe..84a87ff6e 100644
--- a/mach/i80/ncg/mach.c
+++ b/mach/i80/ncg/mach.c
@@ -32,12 +32,12 @@ void con_part(int sz, word w)
 	part_size += sz;
 }
 
-void con_mult(sz) word sz;
-{
+void
+con_mult(word sz) {
 
-	if (argval != 4)
+	if (sz != 4 && sz != 8)
 		fatal("bad icon/ucon size");
-	fprintf(codefile, ".data4\t%ld\n", atol(str));
+	fprintf(codefile,".data%d\t%s\n", (int)sz, str);
 }
 
 #define CODE_GENERATOR
diff --git a/mach/i86/ncg/mach.c b/mach/i86/ncg/mach.c
index d93eaba3d..17cc876b6 100644
--- a/mach/i86/ncg/mach.c
+++ b/mach/i86/ncg/mach.c
@@ -33,14 +33,11 @@ con_part(sz,w) register sz; word w; {
 }
 
 void
-con_mult(sz) word sz; {
-	long l;
+con_mult(word sz) {
 
-	if (sz != 4)
+	if (sz != 4 && sz != 8)
 		fatal("bad icon/ucon size");
-	l = atol(str);
-	fprintf(codefile,"\t.data2 %d,%d\n",
-			(int)l&0xFFFF,(int)(l>>16)&0xFFFF);
+	fprintf(codefile,".data%d\t%s\n", (int)sz, str);
 }
 
 #define CODE_GENERATOR 
diff --git a/mach/m68020/ncg/mach.c b/mach/m68020/ncg/mach.c
index f230761c4..0df6c8389 100644
--- a/mach/m68020/ncg/mach.c
+++ b/mach/m68020/ncg/mach.c
@@ -45,11 +45,11 @@ con_part(sz,w) register sz; word w; {
 }
 
 void
-con_mult(sz) word sz; {
+con_mult(word sz) {
 
-	if (sz != 4)
+	if (sz != 8)
 		fatal("bad icon/ucon size");
-	fprintf(codefile,".data4 %s\n",str);
+	fprintf(codefile,".data8\t%s\n", str);
 }
 
 #define IEEEFLOAT
diff --git a/mach/powerpc/ncg/mach.c b/mach/powerpc/ncg/mach.c
index 1a1d98d6c..218920ed9 100644
--- a/mach/powerpc/ncg/mach.c
+++ b/mach/powerpc/ncg/mach.c
@@ -41,12 +41,11 @@ con_part(int sz, word w)
 }
 
 void
-con_mult(word sz)
-{
+con_mult(word sz) {
 
-	if (argval != 4)
+	if (sz != 8)
 		fatal("bad icon/ucon size");
-	fprintf(codefile,".data4 %s\n", str);
+	fprintf(codefile,".data8\t%s\n", str);
 }
 
 #define CODE_GENERATOR  
diff --git a/mach/vc4/ncg/mach.c b/mach/vc4/ncg/mach.c
index 16ca94f35..68f205c06 100644
--- a/mach/vc4/ncg/mach.c
+++ b/mach/vc4/ncg/mach.c
@@ -29,11 +29,12 @@ void con_part(int sz, word w)
 	part_size += sz;
 }
 
-void con_mult(word sz)
-{
-	if (argval != 4)
+void
+con_mult(word sz) {
+
+	if (sz != 8)
 		fatal("bad icon/ucon size");
-	fprintf(codefile,".data4 %s\n", str);
+	fprintf(codefile,".data8\t%s\n", str);
 }
 
 #define CODE_GENERATOR  

From 893df4b79b91e90cddb289c722d0b9ebecfa252e Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 20 Aug 2019 13:38:18 -0400
Subject: [PATCH 03/22] Experiment with 8-byte integers in ncg i386.

This provides adi, sbi, mli, dvi, rmi, ngi, dvu, rmu 8, but is missing
shifts and rotates.  It is also missing conversions between 8-byte
integers and other sizes of integers or floats.  The code might not be
all correct, but works at least some of the time.

I adapted this from how ncg i86 does 4-byte integers, but I use a
different algorithm when dividing by a large value: i86 avoids the div
instruction and uses a shift-and-subtract loop; but I use the div
instruction to estimate a quotient, which is more like how big integer
libraries do division.  My .dvi8 and .dvu8 also set ecx:ebx to the
remainder; this might be a bad idea, because it requires .dvi8 and
.dvu8 to always calculate the remainder, even when the caller only
wants the quotient.

To play with 8-byte integers, I wrote EM procedures like

     mes 2, 4, 4
     exp $ngi
     pro $ngi,0
     ldl 4
     ngi 8
     lol 0
     sti 8
     lol 0
     ret 4
     end
     exp $adi
     pro $adi,0
     ldl 4
     ldl 12
     adi 8
     lol 0
     sti 8
     lol 0
     ret 4
     end

and called them from C like

    typedef struct { int l; int h; } q;
    q ngi(q);
    q adi(q, q);
---
 mach/i386/libem/build.lua |   2 +-
 mach/i386/libem/dvi8.s    | 115 ++++++++++++++++++++++++++++++++++++++
 mach/i386/libem/mli8.s    |  20 +++++++
 mach/i386/ncg/table       |  41 +++++++++++++-
 4 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 mach/i386/libem/dvi8.s
 create mode 100644 mach/i386/libem/mli8.s

diff --git a/mach/i386/libem/build.lua b/mach/i386/libem/build.lua
index ca5a13c65..b92254d96 100644
--- a/mach/i386/libem/build.lua
+++ b/mach/i386/libem/build.lua
@@ -1,7 +1,7 @@
 for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
-		srcs = { "./*.s" },
+		srcs = { "./*.s" }, -- dvi8.s
 		vars = { plat = plat },
 	}
 end
diff --git a/mach/i386/libem/dvi8.s b/mach/i386/libem/dvi8.s
new file mode 100644
index 000000000..060f85cf1
--- /dev/null
+++ b/mach/i386/libem/dvi8.s
@@ -0,0 +1,115 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.sect .text
+.define .dvi8, .dvu8
+
+yl=8
+yh=12
+xl=16
+xh=20
+	! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
+	! yield edx:eax = quotient, ecx:ebx = remainder.
+
+.dvu8:
+	! Unsigned division: set di = 0 for non-negative quotient.
+	push	edi
+	xor	di,di
+	mov	eax,xh(esp)
+	mov	edx,yh(esp)
+	and	edx,edx
+	jmp	7f
+
+.dvi8:
+	! Signed division: replace x and y with their absolute values.
+	! Set di = 1 for negative quotient, 0 for non-negative.
+	push	edi
+	xor	di,di		! di = 0
+	mov	eax,xh(esp)
+	and	eax,eax
+	jns	1f
+	inc	di		! di = 1
+	neg	eax
+	neg	xl(esp)
+	sbb	eax,0		! eax:xl = absolute value of x
+1:	mov	edx,yh(esp)
+	and	edx,edx
+	jns	7f
+	xor	di,1		! flip di
+	neg	edx
+	neg	yl(esp)
+	sbb	edx,0		! edx:yl = absolute value of y
+
+7:	! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
+	! the values in xh(esp) and yh(esp) are garbage.
+	jnz	8f		! jump if y >= 2**32
+
+	! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
+	! where qh and rh are quotient, remainder from xh / yl.
+	mov	ebx,yl(esp)
+	xor	edx,edx		! edx:eax = xh
+	div	ebx		! eax = qh, edx = rh
+	mov	ecx,eax
+	mov	eax,xl(esp)
+	div	ebx		! eax = ql, edx = remainder
+	mov	ebx,edx
+	mov	edx,ecx		! edx:eax = quotient qh:ql
+	xor	ecx,ecx		! ecx:ebx = remainder
+
+9:	! Finally, if di != 0 then negate quotient, remainder.
+	and	di,di
+	jz	1f
+	neg	edx
+	neg	eax
+	sbb	edx,0		! negate quotient edx:eax
+	neg	ecx
+	neg	ebx
+	sbb	ecx,0		! negate remainder ecx:ebx
+1:	pop	edi		! caller's edi
+	ret	16
+
+8:	! We come here if y >= 2**32.
+	mov	xh(esp),eax
+	mov	yh(esp),edx
+	mov	ebx,yl(esp)	! edx:ebx = y
+
+	! Estimate x / y as q = (x / (y >> cl)) >> cl,
+	! where 2**31 <= (y >> cl) < 2**32.
+	xor	cx,cx
+1:	inc	cx
+	shr	edx,1
+	rcr	ebx,1		! edx:ebx = y >> cl
+	and	edx,edx
+	jnz	1b		! loop until y >> cl fits in ebx
+
+	! x / (y >> cl) = qh + (x + rh) / (y >> cl)
+	push	edi
+	xor	edx,edx		! edx:eax = xh
+	div	ebx		! eax = qh, edx = rh
+	mov	edi,eax
+	mov	eax,xl+4(esp)	! push edi moved xl to xl+4
+	div	ebx		! edi:eax = x / (y >> cl)
+
+	! q = (x / (y >> cl)) >> cl = esi:eax >> cl
+	shr	eax,cl
+	neg	cx		! cl = (32 - cl) modulo 32
+	shl	edi,cl
+	or	eax,edi		! eax = q
+
+	! Calculate the remainder x - q * y.  If the subtraction
+	! overflows, then the correct quotient is q - 1, else it is q.
+	mov	ecx,yh+4(esp)
+	imul	ecx,eax		! ecx = q * yh
+	mov	edi,eax
+	mul	yl+4(esp)	! edx:eax = q * yl
+	add	edx,ecx		! edx:eax = q * y
+	mov	ebx,xl+4(esp)
+	mov	ecx,xh+4(esp)	! ecx:ebx = x
+	sub	ebx,eax
+	sbb	ecx,edx		! ecx:ebx = remainder
+	jnc	1f
+	dec	edi		! fix quotient
+	add	ebx,yl+4(esp)
+	adc	ebx,yh+4(esp)	! fix remainder
+1:	mov	eax,edi
+	xor	edx,edx		! edx:eax = quotient
+	pop	edi		! negative flag
+	jmp	9b
diff --git a/mach/i386/libem/mli8.s b/mach/i386/libem/mli8.s
new file mode 100644
index 000000000..c8b306ac0
--- /dev/null
+++ b/mach/i386/libem/mli8.s
@@ -0,0 +1,20 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.sect .text
+.define .mli8
+
+yl=4
+yh=8
+	! xl in eax
+	! xh in edx
+
+.mli8:
+	! x * y = (xh + xl) * (yh + yl)
+	!       = xh * yh + xh * yl + xl * yh + xl * yl
+	! The term xh * yh overflows to zero.
+	mov	ecx,eax
+	imul	ecx,yh(esp)	! ecx = xl * yh
+	imul	edx,yl(esp)	! edx = xh * yl
+	add	ecx,edx
+	mul	yl(esp)		! edx:eax = xl * yl
+	add	edx,ecx		! edx:eax = x * y
+	ret	8
diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index b7efb3c12..42a39d15b 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -961,6 +961,14 @@ with EXACT rmorconst const
   uses reusing %1,REG=%1
   gen add %a,%2			yields %a
 
+pat adi $1==8
+with REG REG rmorconst rmorconst
+  gen add %1,%3
+      adc %2,%4			yields %2 %1
+with rmorconst rmorconst REG REG
+  gen add %3,%1
+      adc %4,%2			yields %4 %3
+
 /*
 pat adi !defined($1)
 with CXREG ACC
@@ -969,13 +977,17 @@ with CXREG ACC
 */
 
 pat sbi $1==4
-
 with rmorconst REG
   gen sub %2,%1			yields %2
 with EXACT REG rmorconst
   gen sub %1,%2
       neg %1			yields %1
 
+pat sbi $1==8
+with rmorconst rmorconst REG REG
+  gen sub %3,%1
+      sbb %4,%2			yields %4 %3
+
 /*
 pat sbi !defined($1)
 with CXREG ACC
@@ -995,6 +1007,11 @@ with rm const
   uses reusing %1,REG
   gen imul %a,%1,%2		yields %a
 
+pat mli $1==8
+with ACC DXREG
+  kills ALL
+  gen proccall {label,".mli8"}	yields edx eax
+
 /*
 pat mli !defined($1)
 with ACC
@@ -1008,6 +1025,10 @@ with noacc ACC
   gen cdq.
       idiv %1			yields eax
 
+pat dvi $1==8
+  kills ALL
+  gen proccall {label,".dvi8"}	yields edx eax
+
 /*
 pat dvi !defined($1)
 with ACC
@@ -1021,6 +1042,10 @@ with noacc ACC
   gen cdq.
       idiv %1			yields edx
 
+pat rmi $1==8
+  kills ALL
+  gen proccall {label,".dvi8"}	yields ecx ebx
+
 /*
 pat rmi !defined($1)
 with ACC
@@ -1032,6 +1057,12 @@ pat ngi $1==4
 with REG
   gen neg %1			yields %1
 
+pat ngi $1==8
+with REG REG
+  gen neg %2
+      neg %1
+      sbb %2,{ANYCON,0}		yields %2 %1
+
 /*
 pat ngi !defined($1)
 with ACC
@@ -1114,6 +1145,10 @@ with noacc ACC
 uses DXREG={ANYCON,0}
 gen div %1			yields eax
 
+pat dvu $1==8
+  kills ALL
+  gen proccall {label,".dvu8"}	yields edx eax
+
 /*
 pat dvu !defined($1)
 with ACC STACK
@@ -1126,6 +1161,10 @@ with noacc ACC
 uses DXREG={ANYCON,0}
 gen div %1			yields edx
 
+pat rmu $1==8
+  kills ALL
+  gen proccall {label,".dvu8"}	yields ecx ebx
+
 /*
 pat rmu !defined($1)
 with ACC STACK

From 007a63d52987dcad3719f1bb7dc5a7426789098a Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Mon, 2 Sep 2019 11:24:44 -0400
Subject: [PATCH 04/22] Begin to add `long long` to C compiler for linux386.

Add long long type, but without literals; you can't say '123LL' yet.
You can try constant operations, like `(long long)123 + 1`, but the
compiler's `arith` type might not be wide enough.  Conversions,
shifts, and some other operations don't work in i386 ncg; I am using a
union instead of conversions:

	union q {
		long long ll;
		unsigned long long ull;
		int i[2];
	};

Hack plat/linux386/descr to enable long long (size 8, alignment 4)
only for this platform.  The default for other platforms is to disable
long long (size -1).

In lang/cem/cemcom.ansi,

 - BigPars, SmallPars: Add default size, alignment of long long.
 - align.h: Add lnglng_align.
 - arith.c: Convert arithmetic operands to long long or unsigned long
   long when necessary; avoid conversion from long long to long.
   Allow long long as an arithmetic, integral, or logical operand.
 - ch3.c: Handle long long like int and long when erroneously applying
   a selector, like `long long ll; ll.member` or `ll->member`.  Add
   long long to integral and arithmetic types.
 - code.c: Add long long to type stabs for debugging.
 - conversion.c: Add long long to integral conversions.
 - cstoper.c: Write masks up to full_mask[8].  Add FIXME comment.
 - declar.g: Parse `long long` in code.
 - decspecs.c: Understand long long in type declarations.
 - eval.c: Add long long to operations, to generate code like `adi 8`.
   Don't use `ldc` with constant over 4 bytes.
 - ival.g: Allow long long in initializations.
 - main.c: Set lnglng_type and related values.
 - options.c: Add option like `-Vq8.4` to set long long to size 8,
   alignment 4.  I chose 'q', because Perl's pack and Ruby's
   Array#pack use 'q' for 64-bit or long long values; it might be a
   reference to BSD's old quad_t alias for long long.
 - sizes.h: Add lnglng_size.
 - stab.c: Allow long long when writing the type stab for debugging.
   Switch from calculating the ranges to hardcoding them in strings;
   add 8-byte ranges as a special case.  This also hardcodes the
   unsigned 4-byte range as "0;-1".  Before it was either "0;-1" or
   "0;4294967295", depending on sizeof(long) in the compiler.
 - struct.c: Try long long bitfield, but it will probably give the
   error, "bit field type long long does not fit in a word".
 - switch.c: Update comment.
 - tokenname.c: Define LNGLNG (long long) like LNGDBL (long double).
 - type.c, type.str: Add lnglng_type and ulnglng_type.  Add function
   no_long_long() to check if long long is disabled.
---
 lang/cem/cemcom.ansi/BigPars      |   2 +
 lang/cem/cemcom.ansi/SmallPars    |   2 +
 lang/cem/cemcom.ansi/align.h      |   2 +
 lang/cem/cemcom.ansi/arith.c      | 102 ++++++++++++++++++++----------
 lang/cem/cemcom.ansi/ch3.c        |   4 ++
 lang/cem/cemcom.ansi/code.c       |   4 ++
 lang/cem/cemcom.ansi/conversion.c |   1 +
 lang/cem/cemcom.ansi/cstoper.c    |   6 +-
 lang/cem/cemcom.ansi/declar.g     |  10 ++-
 lang/cem/cemcom.ansi/decspecs.c   |  20 +++++-
 lang/cem/cemcom.ansi/eval.c       |  21 ++++--
 lang/cem/cemcom.ansi/ival.g       |   1 +
 lang/cem/cemcom.ansi/main.c       |   6 ++
 lang/cem/cemcom.ansi/options.c    |   6 ++
 lang/cem/cemcom.ansi/sizes.h      |   2 +
 lang/cem/cemcom.ansi/stab.c       |  27 +++++---
 lang/cem/cemcom.ansi/struct.c     |   1 +
 lang/cem/cemcom.ansi/switch.c     |   2 +-
 lang/cem/cemcom.ansi/tokenname.c  |   1 +
 lang/cem/cemcom.ansi/type.c       |  18 ++++++
 lang/cem/cemcom.ansi/type.str     |   2 +
 plat/linux386/descr               |   3 +-
 22 files changed, 187 insertions(+), 56 deletions(-)

diff --git a/lang/cem/cemcom.ansi/BigPars b/lang/cem/cemcom.ansi/BigPars
index bf92e22fe..ad758c8a6 100644
--- a/lang/cem/cemcom.ansi/BigPars
+++ b/lang/cem/cemcom.ansi/BigPars
@@ -55,6 +55,7 @@
 #define SZ_WORD		4
 #define	SZ_INT		4
 #define	SZ_LONG		4
+#define	SZ_LNGLNG	-1
 #define	SZ_FLOAT	4
 #define	SZ_DOUBLE	8
 #define	SZ_LNGDBL	8	/* for now */
@@ -66,6 +67,7 @@
 #define AL_WORD		SZ_WORD
 #define	AL_INT		SZ_WORD
 #define	AL_LONG		SZ_WORD
+#define	AL_LNGLNG	SZ_WORD
 #define	AL_FLOAT	SZ_WORD
 #define	AL_DOUBLE	SZ_WORD
 #define	AL_LNGDBL	SZ_WORD
diff --git a/lang/cem/cemcom.ansi/SmallPars b/lang/cem/cemcom.ansi/SmallPars
index 4ef50e48e..01f7073b1 100644
--- a/lang/cem/cemcom.ansi/SmallPars
+++ b/lang/cem/cemcom.ansi/SmallPars
@@ -55,6 +55,7 @@
 #define SZ_WORD		4
 #define	SZ_INT		4
 #define	SZ_LONG		4
+#define	SZ_LNGLNG	-1
 #define	SZ_FLOAT	4
 #define	SZ_DOUBLE	8
 #define	SZ_LNGDBL	8	/* for now */
@@ -66,6 +67,7 @@
 #define AL_WORD		SZ_WORD
 #define	AL_INT		SZ_WORD
 #define	AL_LONG		SZ_WORD
+#define	AL_LNGLNG	SZ_WORD
 #define	AL_FLOAT	SZ_WORD
 #define	AL_DOUBLE	SZ_WORD
 #define	AL_LNGDBL	SZ_WORD
diff --git a/lang/cem/cemcom.ansi/align.h b/lang/cem/cemcom.ansi/align.h
index 79b9be97b..67bb24265 100644
--- a/lang/cem/cemcom.ansi/align.h
+++ b/lang/cem/cemcom.ansi/align.h
@@ -10,6 +10,7 @@
 #ifndef NOCROSS
 extern int
 	short_align, word_align, int_align, long_align,
+	lnglng_align,
 	float_align, double_align, lngdbl_align,
 	pointer_align,
 	struct_align, union_align;
@@ -18,6 +19,7 @@ extern int
 #define word_align	((int)AL_WORD)
 #define int_align	((int)AL_INT)
 #define long_align	((int)AL_LONG)
+#define lnglng_align	((int)AL_LNGLNG)
 #define float_align	((int)AL_FLOAT)
 #define double_align	((int)AL_DOUBLE)
 #define	lngdbl_align	((int)AL_LNGDBL)
diff --git a/lang/cem/cemcom.ansi/arith.c b/lang/cem/cemcom.ansi/arith.c
index 6504a7305..e1c3311c3 100644
--- a/lang/cem/cemcom.ansi/arith.c
+++ b/lang/cem/cemcom.ansi/arith.c
@@ -12,6 +12,7 @@
 */
 
 #include	<assert.h>
+#include	<stddef.h>
 #include    "parameters.h"
 #include	<alloc.h>
 #include	<flt_arith.h>
@@ -45,7 +46,8 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		have a floating type, in which case the flags shouldn't
 		travel upward in the expression tree.
 	*/
-	register int t1, t2, u1, u2;
+	struct type *convert1, *convert2;
+	int t1, t2, u1, u2;
 	int shifting = (oper == LEFT || oper == RIGHT
 			|| oper == LEFTAB || oper == RIGHTAB);
 	int ptrdiff = 0;
@@ -56,9 +58,11 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 	if (int_size != pointer_size) {
 		if (ptrdiff = ((*e1p)->ex_flags & EX_PTRDIFF)
 			    || ((*e2p)->ex_flags & EX_PTRDIFF)) {
-			if (!((*e1p)->ex_flags & EX_PTRDIFF) && t1 == LONG)
+			if (!((*e1p)->ex_flags & EX_PTRDIFF)
+			    && (t1 == LONG || t1 == LNGLNG))
 				ptrdiff = 0;
-			if (!((*e2p)->ex_flags & EX_PTRDIFF) && t2 == LONG
+			if (!((*e2p)->ex_flags & EX_PTRDIFF)
+			    && (t2 == LONG || t2 == LNGLNG)
 			    && !shifting)
 				ptrdiff = 0;
 		}
@@ -67,7 +71,9 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		(*e2p)->ex_flags &= ~EX_PTRDIFF;
 	}
 
-	/* Now t1 and t2 are either INT, LONG, FLOAT, DOUBLE, or LNGDBL */
+	/*	Now t1 and t2 are either INT, LONG, LNGLNG,
+		FLOAT, DOUBLE, or LNGDBL
+	*/
 
 	/*	If any operand has the type long double, the other operand
 		is converted to long double.
@@ -82,11 +88,12 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		}
 		return;
 	} else if (t2 == LNGDBL) {
-		if (t1 != LNGDBL)
+		if (t1 != LNGDBL) {
 		    if (t1 == DOUBLE || t1 == FLOAT)
 			float2float(e1p, lngdbl_type);
 		    else
 			int2float(e1p, lngdbl_type);
+		}
 		return;
 	}
 
@@ -120,38 +127,63 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		return;
 	}
 
-	/* Now they are INT or LONG */
+	/* Now they are INT, LONG or LNGLNG */
 	u1 = (*e1p)->ex_type->tp_unsigned;
 	u2 = (*e2p)->ex_type->tp_unsigned;
+	convert1 = NULL;
+	convert2 = NULL;
 
-	/*	If either operand has type unsigned long int, the other
-		operand is converted to unsigned long int.
-	*/
-	if (t1 == LONG && u1 && (t2 != LONG || !u2))
-		t2 = int2int(e2p, ulong_type);
-	else if (t2 == LONG && u2 && (t1 != LONG || !u1)
-			&& !shifting)	/* ??? */
-		t1 = int2int(e1p, ulong_type);
+	/*	If either operand is a long long, the other operand
+		is converted to long long; else if either operand is
+		a long, the other operand is converted to a long.
 
-	/*	If one operand has type long int and the other has type unsigned
-		int, if a long int can represent all values of an unsigned int,
-		the operand of type unsigned int is converted to long int; if
-		a long int cannot represent all values of an unsigned int,
-		both operands are converted to unsigned long int.
+		If one operand is signed and the other operand is
+		unsigned, if the signed type can represent all values
+		of the unsigned type, the unsigned operand is
+		converted to the signed type, else both operands are
+		converted to an unsigned type.
 	*/
-	if (t1 == LONG && t2 == INT && u2)
-		t2 = int2int(e2p, (int_size<long_size)? long_type : ulong_type);
-	else if (t2 == LONG && t1 == INT && u1 && !shifting)	/* ??? */
-		t1 = int2int(e1p, (int_size<long_size)? long_type : ulong_type);
+	if (t1 == LNGLNG && u1 && (t2 != LNGLNG || !u2))
+		convert2 = ulnglng_type;
+	else if (t2 == LNGLNG && u2 && (t1 != LNGLNG || !u1))
+		convert1 = ulnglng_type;
+	else if (t1 == LNGLNG && t2 != LNGLNG && u2) {
+		if ((t2 == LONG ? long_size : int_size) < lnglng_size)
+			convert2 = lnglng_type;
+		else
+			convert1 = convert2 = ulnglng_type;
+	} else if (t2 == LNGLNG && t1 != LNGLNG && u1) {
+		if ((t1 == LONG ? long_size : int_size) < lnglng_size)
+			convert1 = lnglng_type;
+		else
+			convert1 = convert2 = ulnglng_type;
+	} else if (t1 == LNGLNG && t2 != LNGLNG)
+		convert2 = lnglng_type;
+	else if (t2 == LNGLNG && t1 != LNGLNG)
+		convert1 = lnglng_type;
+	else if (t1 == LONG && u1 && (t2 != LONG || !u2))
+		convert2 = ulong_type;
+	else if (t2 == LONG && u2 && (t1 != LONG || !u1))
+		convert1 = ulong_type;
+	else if (t1 == LONG && t2 == INT && u2) {
+		if (int_size < long_size)
+			convert2 = long_type;
+		else
+			convert1 = convert2 = ulong_type;
+	} else if (t2 == LONG && t1 == INT && u1) {
+		if (int_size < long_size)
+			convert1 = long_type;
+		else
+			convert1 = convert2 = ulong_type;
+	} else if (t1 == LONG && t2 != LONG)
+		convert2 = long_type;
+	else if (t2 == LONG && t1 != LONG)
+		convert1 = long_type;
 
-	/*	If either operand has type long int, the other operand is con-
-		verted to long int.
-	*/
-	if (t1 == LONG && t2 != LONG)
-		t2 = int2int(e2p, long_type);
-	else
-	if (t2 == LONG && t1 != LONG && !shifting)	/* ??? */
-		t1 = int2int(e1p, long_type);
+	if (convert1 && !shifting)	/* ??? */
+		t1 = int2int(e1p, convert1);
+	if (convert2)
+		t2 = int2int(e2p, convert2);
 
 	u1 = (*e1p)->ex_type->tp_unsigned;
 	u2 = (*e2p)->ex_type->tp_unsigned;
@@ -161,10 +193,10 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		Otherwise, both operands have type int.
 	*/
 	if (u1 && !u2 && !shifting)
-		t2 = int2int(e2p, (t1 == LONG) ? ulong_type : uint_type);
+		t2 = int2int(e2p, uint_type);
 	else
 	if (!u1 && u2 && !shifting)
-		t1 = int2int(e1p, (t2 == LONG) ? ulong_type : uint_type);
+		t1 = int2int(e1p, uint_type);
 
 	if (int_size != pointer_size) {
 		if (ptrdiff) {
@@ -259,6 +291,7 @@ any2arith(register struct expr **expp, register int oper)
 		break;
 	case INT:
 	case LONG:
+	case LNGLNG:
 		break;
 	case ENUM:
 #ifndef	LINT
@@ -457,7 +490,7 @@ void opnd2integral(register struct expr **expp, int oper)
 {
 	register int fund = (*expp)->ex_type->tp_fund;
 
-	if (fund != INT && fund != LONG)	{
+	if (fund != INT && fund != LONG && fund != LNGLNG) {
 		expr_error(*expp, "%s operand to %s",
 				symbol2str(fund), symbol2str(oper));
 		erroneous2int(expp);
@@ -486,6 +519,7 @@ void opnd2logical(register struct expr **expp, int oper)
 	case SHORT:
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 	case POINTER:
 	case FLOAT:
diff --git a/lang/cem/cemcom.ansi/ch3.c b/lang/cem/cemcom.ansi/ch3.c
index 3132ccbb9..69c75fed6 100644
--- a/lang/cem/cemcom.ansi/ch3.c
+++ b/lang/cem/cemcom.ansi/ch3.c
@@ -58,6 +58,7 @@ void ch3sel(struct expr **expp, int oper, struct idf *idf)
 				break;
 			case INT:
 			case LONG:
+			case LNGLNG:
 				/* An error is given in idf2sdef() */
 				ch3cast(expp, CAST, pa_type);
 				sd = idf2sdef(idf, tp);
@@ -82,6 +83,7 @@ void ch3sel(struct expr **expp, int oper, struct idf *idf)
 		break;
 	case INT:
 	case LONG:
+	case LNGLNG:
 		/* warning will be given by idf2sdef() */
 		break;
 	default:
@@ -679,6 +681,7 @@ int is_integral_type(register struct type *tp)
 	case SHORT:
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 		return 1;
 #ifndef NOBITFIELD
@@ -697,6 +700,7 @@ int is_arith_type(register struct type *tp)
 	case SHORT:
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 	case FLOAT:
 	case DOUBLE:
diff --git a/lang/cem/cemcom.ansi/code.c b/lang/cem/cemcom.ansi/code.c
index d50700056..cef708cbf 100644
--- a/lang/cem/cemcom.ansi/code.c
+++ b/lang/cem/cemcom.ansi/code.c
@@ -101,6 +101,10 @@ void init_code(char *dst_file)
 		stb_typedef(ushort_type, "unsigned short");
 		stb_typedef(ulong_type, "unsigned long");
 		stb_typedef(uint_type, "unsigned int");
+		if (lnglng_size >= 0) {
+			stb_typedef(lnglng_type, "long long");
+			stb_typedef(ulnglng_type, "unsigned long long");
+		}
 		stb_typedef(float_type, "float");
 		stb_typedef(double_type, "double");
 		stb_typedef(lngdbl_type, "long double");
diff --git a/lang/cem/cemcom.ansi/conversion.c b/lang/cem/cemcom.ansi/conversion.c
index 60d4c45b5..13af3cef8 100644
--- a/lang/cem/cemcom.ansi/conversion.c
+++ b/lang/cem/cemcom.ansi/conversion.c
@@ -137,6 +137,7 @@ static int convtype(register struct type *tp)
 	case INT:
 	case ERRONEOUS:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 		return tp->tp_unsigned ? T_UNSIGNED : T_SIGNED;
 	case FLOAT:
diff --git a/lang/cem/cemcom.ansi/cstoper.c b/lang/cem/cemcom.ansi/cstoper.c
index 774e356d6..61c3fd6e3 100644
--- a/lang/cem/cemcom.ansi/cstoper.c
+++ b/lang/cem/cemcom.ansi/cstoper.c
@@ -175,7 +175,11 @@ void init_cst(void)
 	register int i = 0;
 	register arith bt = (arith)0;
 
-	while (!(bt < 0))	{
+	/*	FIXME arith is insufficient for long long.  We ignore
+		this problem and write masks up to full_mask[8], but
+		masks are wrong after bt < 0.
+	*/
+	while (!(bt < 0) || i < 8) {
 		bt = (bt << 8) + 0377, i++;
 		if (i > MAXSIZE)
 			fatal("array full_mask too small for this machine");
diff --git a/lang/cem/cemcom.ansi/declar.g b/lang/cem/cemcom.ansi/declar.g
index 92adefb5f..8bec8e72d 100644
--- a/lang/cem/cemcom.ansi/declar.g
+++ b/lang/cem/cemcom.ansi/declar.g
@@ -123,9 +123,13 @@ single_decl_specifier /* non_empty */ (register struct decspecs *ds;)
 	}
 |
 	[ SHORT | LONG ]
-	{	if (ds->ds_size)
-			error("repeated size specifier");
-		ds->ds_size = DOT;
+	{	if (ds->ds_size == LONG && DOT == LONG)
+			ds->ds_size = LNGLNG;
+		else {
+			if (ds->ds_size)
+				error("repeated size specifier");
+			ds->ds_size = DOT;
+		}
 	}
 |
 	[ SIGNED | UNSIGNED ]
diff --git a/lang/cem/cemcom.ansi/decspecs.c b/lang/cem/cemcom.ansi/decspecs.c
index 42465e213..8af08eacb 100644
--- a/lang/cem/cemcom.ansi/decspecs.c
+++ b/lang/cem/cemcom.ansi/decspecs.c
@@ -69,7 +69,8 @@ void do_decspecs(register struct decspecs *ds)
 	}
 	if (ds->ds_size)
 	{
-		register int ds_isshort = (ds->ds_size == SHORT);
+		int ds_isshort = (ds->ds_size == SHORT);
+		int ds_islong = (ds->ds_size == LONG);
 
 		if (ds->ds_typedef)
 			goto SIZE_ERROR;
@@ -78,10 +79,18 @@ void do_decspecs(register struct decspecs *ds)
 		{
 			if (ds_isshort)
 				tp = short_type;
-			else
+			else if (ds_islong)
 				tp = long_type;
+			else
+			{
+				assert(ds->ds_size == LNGLNG);
+				if (no_long_long())
+					tp = error_type;
+				else
+					tp = lnglng_type;
+			}
 		}
-		else if (tp == double_type && !ds_isshort)
+		else if (tp == double_type && ds_islong)
 		{
 			tp = lngdbl_type;
 		}
@@ -122,6 +131,11 @@ void do_decspecs(register struct decspecs *ds)
 			if (ds_isunsigned)
 				tp = ulong_type;
 		}
+		else if (tp == lnglng_type)
+		{
+			if (ds_isunsigned)
+				tp = ulnglng_type;
+		}
 		else
 		{
 			SIGN_ERROR: error("%s with illegal type",
diff --git a/lang/cem/cemcom.ansi/eval.c b/lang/cem/cemcom.ansi/eval.c
index 352d590b3..338ca51cb 100644
--- a/lang/cem/cemcom.ansi/eval.c
+++ b/lang/cem/cemcom.ansi/eval.c
@@ -133,13 +133,15 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 		case '+':
 			/*	We have the following possibilities :
 				int + int, pointer + int, pointer + long,
-				long + long, double + double
+				long + long, long long + long long,
+				double + double
 			*/
 			operands(expr, gencode);
 			if (gencode) {
 				switch (tp->tp_fund) {
 				case INT:
 				case LONG:
+				case LNGLNG:
 					if (tp->tp_unsigned)
 						C_adu(tp->tp_size);
 					else
@@ -165,6 +167,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 					switch (tp->tp_fund) {
 					case INT:
 					case LONG:
+					case LNGLNG:
 					case POINTER:
 						C_ngi(tp->tp_size);
 						break;
@@ -181,7 +184,8 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 			}
 			/*	else binary; we have the following flavours:
 				int - int, pointer - int, pointer - long,
-				pointer - pointer, long - long, double - double
+				pointer - pointer, long - long,
+				long long - long long, double - double
 			*/
 			operands(expr, gencode);
 			if (!gencode)
@@ -189,6 +193,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 			switch (tp->tp_fund) {
 			case INT:
 			case LONG:
+			case LNGLNG:
 				if (tp->tp_unsigned)
 					C_sbu(tp->tp_size);
 				else
@@ -224,6 +229,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 				switch (tp->tp_fund) {
 				case INT:
 				case LONG:
+				case LNGLNG:
 				case POINTER:
 					if (tp->tp_unsigned)
 						C_mlu(tp->tp_size);
@@ -246,6 +252,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 				switch (tp->tp_fund) {
 				case INT:
 				case LONG:
+				case LNGLNG:
 				case POINTER:
 					if (tp->tp_unsigned)
 						C_dvu(tp->tp_size);
@@ -264,7 +271,8 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 			break;
 		case '%':
 			operands(expr, gencode);
-			assert(tp->tp_fund==INT || tp->tp_fund==LONG);
+			assert(tp->tp_fund==INT || tp->tp_fund==LONG ||
+			       tp->tp_fund==LNGLNG);
 			if (gencode)
 				if (tp->tp_unsigned)
 					C_rmu(tp->tp_size);
@@ -301,6 +309,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 				switch (tp->tp_fund) {
 				case INT:
 				case LONG:
+				case LNGLNG:
 					if (left->ex_type->tp_unsigned)
 						C_cmu(size);
 					else
@@ -736,6 +745,7 @@ void assop(register struct type *type, int oper)
 	case SHORT:
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 		switch (oper) {
 		case PLUSAB:
@@ -1014,10 +1024,13 @@ void load_val(register struct expr *expr, int rlval)
 
 void load_cst(arith val, arith siz)
 {
+	/*	EM can't encode ldc with constant over 4 bytes.
+		Such a constant must go into rom.
+	*/
 	if ((int)siz <= (int)word_size)
 		C_loc(val);
 	else
-	if ((int)siz == (int)dword_size)
+	if ((int)siz == (int)dword_size && (int)dword_size <= 4)
 		C_ldc(val);
 	else {
 		label datlab;
diff --git a/lang/cem/cemcom.ansi/ival.g b/lang/cem/cemcom.ansi/ival.g
index 398343ed4..995d46248 100644
--- a/lang/cem/cemcom.ansi/ival.g
+++ b/lang/cem/cemcom.ansi/ival.g
@@ -518,6 +518,7 @@ void check_ival(struct expr **expp, register struct type *tp)
 	case SHORT:
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case ENUM:
 	case POINTER:
 		ch3cast(expp, '=', tp);
diff --git a/lang/cem/cemcom.ansi/main.c b/lang/cem/cemcom.ansi/main.c
index e7988bf66..da5ced81d 100644
--- a/lang/cem/cemcom.ansi/main.c
+++ b/lang/cem/cemcom.ansi/main.c
@@ -57,6 +57,7 @@ arith
 	dword_size = (2 * SZ_WORD),
 	int_size = SZ_INT,
 	long_size = SZ_LONG,
+	lnglng_size = SZ_LNGLNG,
 	float_size = SZ_FLOAT,
 	double_size = SZ_DOUBLE,
 	lngdbl_size = SZ_LNGDBL,
@@ -67,6 +68,7 @@ int
 	word_align = AL_WORD,
 	int_align = AL_INT,
 	long_align = AL_LONG,
+	lnglng_align = AL_LNGLNG,
 	float_align = AL_FLOAT,
 	double_align = AL_DOUBLE,
 	lngdbl_align = AL_LNGDBL,
@@ -227,6 +229,10 @@ static void init(void)
 	long_type = standard_type(LONG, 0, long_align, long_size);
 	ulong_type = standard_type(LONG, UNSIGNED, long_align, long_size);
 
+	lnglng_type = standard_type(LNGLNG, 0, lnglng_align, lnglng_size);
+	ulnglng_type = standard_type(LNGLNG, UNSIGNED, lnglng_align,
+				     lnglng_size);
+
 	float_type = standard_type(FLOAT, 0, float_align, float_size);
 	double_type = standard_type(DOUBLE, 0, double_align, double_size);
 	lngdbl_type = standard_type(LNGDBL, 0, lngdbl_align, lngdbl_size);
diff --git a/lang/cem/cemcom.ansi/options.c b/lang/cem/cemcom.ansi/options.c
index 0c37c85d4..2cdfe3835 100644
--- a/lang/cem/cemcom.ansi/options.c
+++ b/lang/cem/cemcom.ansi/options.c
@@ -159,6 +159,12 @@ next_option:			/* to allow combined one-char options */
 				if (algn != 0)
 					long_align = algn;
 				break;
+			case 'q':	/* long long	*/
+				if (sz != (arith)0)
+					lnglng_size = sz;
+				if (algn != 0)
+					lnglng_align = algn;
+				break;
 			case 'f':	/* float	*/
 				if (sz != (arith)0)
 					float_size = sz;
diff --git a/lang/cem/cemcom.ansi/sizes.h b/lang/cem/cemcom.ansi/sizes.h
index 0f38c5668..f943d184b 100644
--- a/lang/cem/cemcom.ansi/sizes.h
+++ b/lang/cem/cemcom.ansi/sizes.h
@@ -10,6 +10,7 @@
 #ifndef NOCROSS
 extern arith
 	short_size, word_size, dword_size, int_size, long_size,
+	lnglng_size,
 	float_size, double_size, lngdbl_size,
 	pointer_size;
 
@@ -20,6 +21,7 @@ extern arith max_int, max_unsigned;	/* cstoper.c	*/
 #define dword_size	((arith)2*SZ_WORD)
 #define int_size	((arith)SZ_INT)
 #define long_size	((arith)SZ_LONG)
+#define lnglng_size	((arith)SZ_LNGLNG)
 #define float_size	((arith)SZ_FLOAT)
 #define double_size	((arith)SZ_DOUBLE)
 #define	lngdbl_size	((arith)SZ_LNGDBL)
diff --git a/lang/cem/cemcom.ansi/stab.c b/lang/cem/cemcom.ansi/stab.c
index a7a7928c9..5a002301f 100644
--- a/lang/cem/cemcom.ansi/stab.c
+++ b/lang/cem/cemcom.ansi/stab.c
@@ -74,9 +74,10 @@ static void adds_db_str(char *s)
 
 static void stb_type(register struct type *tp)
 {
-	char buf[128];
+	char buf[128], *range;
 	static int stb_count;
 	long l;
+	int uns;
 
 	if (tp->tp_dbindex > 0)
 	{
@@ -101,18 +102,26 @@ static void stb_type(register struct type *tp)
 		break;
 	case INT:
 	case LONG:
+	case LNGLNG:
 	case CHAR:
 	case SHORT:
-		l = full_mask[(int) tp->tp_size];
-		if (tp->tp_unsigned)
+		switch ((tp->tp_size << 3) + !tp->tp_unsigned)
 		{
-			adds_db_str(sprint(buf, "r%d;0;%ld", tp->tp_dbindex, l));
-		}
-		else
-		{
-			l &= ~(1L << ((int) tp->tp_size * 8 - 1));
-			adds_db_str(sprint(buf, "r%d;%ld;%ld", tp->tp_dbindex, -l - 1, l));
+#define R(s) range = #s; break
+		case 0010: R(0;255);
+		case 0011: R(-128;127);
+		case 0020: R(0;65535);
+		case 0021: R(-32768;32767);
+		default:   R(0;-1); /* acts as 0;4294967295 */
+		case 0041: R(-2147483648;2147483647);
+		/*	The stabs reader in gdb(1) needs an octal integer
+			when its value doesn't fit in type long.
+		*/
+		case 0100: R(0;01777777777777777777777);
+		case 0101: R(01000000000000000000000;0777777777777777777777);
+#undef R
 		}
+		adds_db_str(sprint(buf, "r%d;%s", tp->tp_dbindex, range));
 		break;
 	case FLOAT:
 	case DOUBLE:
diff --git a/lang/cem/cemcom.ansi/struct.c b/lang/cem/cemcom.ansi/struct.c
index b69acd206..d8f6ef2a6 100644
--- a/lang/cem/cemcom.ansi/struct.c
+++ b/lang/cem/cemcom.ansi/struct.c
@@ -360,6 +360,7 @@ add_field(
 	case SHORT:
 	case ENUM:
 	case LONG:
+	case LNGLNG:
 		strict("non-portable field type");
 	case INT:
 		/* right type; size OK? */
diff --git a/lang/cem/cemcom.ansi/switch.c b/lang/cem/cemcom.ansi/switch.c
index 24cb82c6f..7db1f28bd 100644
--- a/lang/cem/cemcom.ansi/switch.c
+++ b/lang/cem/cemcom.ansi/switch.c
@@ -61,7 +61,7 @@ void code_startswitch(struct expr **expp)
 	register label l_break = text_label();
 	register struct switch_hdr *sh = new_switch_hdr();
 	int fund = any2arith(expp, SWITCH);
-				    /* INT, LONG, FLOAT, DOUBLE or LNGDBL */
+			/* INT, LONG, LNGLNG, FLOAT, DOUBLE or LNGDBL */
 	
 	switch (fund) {
 	case FLOAT:
diff --git a/lang/cem/cemcom.ansi/tokenname.c b/lang/cem/cemcom.ansi/tokenname.c
index b430abbd8..fb3bae349 100644
--- a/lang/cem/cemcom.ansi/tokenname.c
+++ b/lang/cem/cemcom.ansi/tokenname.c
@@ -104,6 +104,7 @@ struct tokenname tkidf[] =	{	/* names of the identifier tokens */
 
 #ifdef	____
 struct tokenname tkfunny[] =	{	/* internal keywords */
+	{LNGLNG, "long long"},
 	{LNGDBL, "long double"},
 	{ULONG, "unsigned long"},
 
diff --git a/lang/cem/cemcom.ansi/type.c b/lang/cem/cemcom.ansi/type.c
index 750ed9a8a..57a6cd98e 100644
--- a/lang/cem/cemcom.ansi/type.c
+++ b/lang/cem/cemcom.ansi/type.c
@@ -25,6 +25,7 @@
  */
 struct type *schar_type, *uchar_type, *short_type, *ushort_type, *word_type,
 		*uword_type, *int_type, *uint_type, *long_type, *ulong_type,
+		*lnglng_type, *ulnglng_type,
 		*float_type, *double_type, *lngdbl_type, *void_type, *string_type,
 		*funint_type, *error_type;
 
@@ -291,3 +292,20 @@ void completed(struct type *tp)
 		atp = atp->next;
 	}
 }
+
+int no_long_long(void)
+{
+	static int shown = 0;
+
+	if (lnglng_size < 0)
+	{
+		if (!shown)
+		{
+			error("no long long for this machine");
+			shown = 1;
+		}
+		return 1;
+	}
+	else
+		return 0;
+}
diff --git a/lang/cem/cemcom.ansi/type.str b/lang/cem/cemcom.ansi/type.str
index b98ffbb6c..ada366231 100644
--- a/lang/cem/cemcom.ansi/type.str
+++ b/lang/cem/cemcom.ansi/type.str
@@ -71,6 +71,7 @@ extern struct type
 	*word_type, *uword_type,
 	*int_type, *uint_type,
 	*long_type, *ulong_type,
+	*lnglng_type, *ulnglng_type,
 	*float_type, *double_type, *lngdbl_type,
 	*void_type,
 	*string_type, *funint_type, *error_type;
@@ -93,6 +94,7 @@ void idf2type(struct idf *idf, struct type **tpp);
 arith align(arith pos, int al);
 struct type * standard_type(int fund, int sgn, int algn, arith sz);
 void completed(struct type *tp);
+int no_long_long(void);
 
 
 /* ALLOCDEF "type" 50 */
diff --git a/plat/linux386/descr b/plat/linux386/descr
index fd033f423..8a7dc5f47 100644
--- a/plat/linux386/descr
+++ b/plat/linux386/descr
@@ -23,7 +23,8 @@ var CPP_F=-D__unix
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08048054
 var C_LIB={PLATFORMDIR}/libc-ansi.a
 # bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
+# long long enabled.
+var CC_ALIGN=-Vrq8.4
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=-m10
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

From 15950f9c9593aa55245ad2ca1b26786ec6ff0dff Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Wed, 4 Sep 2019 22:14:38 -0400
Subject: [PATCH 05/22] Add long long literals like 123LL to ACK C.

For now, a long long literal must have the 'LL' or 'll' suffix.  A
literal without 'LL' or 'll' acts as before: it may become unsigned
long but not long long.  (For targets where int and long have the same
size, some literals change from unsigned int to unsigned long.)

Type `arith` may be too narrow for long long values.  Add a second
type `writh` for wide arithmetic, and change some variables from arith
to writh.  This may cause bugs if I forget to use writh, or if a
conversion from writh to arith overflows.  I mark some conversions
with (arith) or (writh) casts.

 - BigPars, SmallPars: Remove SPECIAL_ARITHMETICS.  This feature
   would change arith to a different type, but can't work, because it
   would conflict with definitions of arith in both <em_arith.h> and
   <flt_arith.h>.
 - LLlex.c: Understand 'LL' or 'll' suffix.  Cut size of constant when
   it overflows writh, not only when it overflows the target machine's
   types.  (This cut might not be necessary, because we might cut it
   again later.)  When picking signed long or unsigned long, check the
   target's long type, not the compiler's arith type; the old check
   for `val >= 0` was broken where sizeof(arith) > 4.
 - LLlex.h: Change struct token's tok_ival to writh, so it can hold a
   long long literal.
 - arith.c: Adjust to VL_VALUE being writh.  Don't convert between
   float and integer at compile-time if the integer might be too wide
   for <flt_arith.h>.  Add writh2str(), because writh might be too
   wide for long2str().
 - arith.h: Remove SPECIAL_ARITHMETICS.  Declare full_mask[] here,
   not in several *.c files.  Declare writh2str().
 - ch3.c, ch3bin.c, ch3mon.c, declarator.c, statement.g: Remove
   obsolete casts.  Adjust to VL_VALUE being writh.
 - conversion.c, stab.c: Don't declare full_mask[].
 - cstoper.c: Use writh for constant operations on VL_VALUE, and for
   full_mask[].
 - declar., field.c, ival.g: Add casts.
 - dumpidf.c: Need to #include "parameters.h" before checking DEBUG.
   Use writh2str, because "%ld" might not work.
 - eval.c, eval.h: Add casts.  Use writh when writing a wide constant
   in EM.
 - expr.c: Add and remove casts.  In fill_int_expr(), make expression
   from long long literal.  In chk_cst_expr(), allow long long as
   constant expression, so the compiler may accept `case 123LL:` in a
   switch statement.
 - expr.str: Change struct value's vl_value and struct expr's VL_VALUE
   to writh, so an expression may have a long long value at compile
   time.
 - statement.g: Remove obsolete casts.
 - switch.c, switch.str: Use writh in case entries for switch
   statements, so `switch (ll) {...}` with long long ll works.
 - tokenname.c: Add ULNGLNG so LLlex.c can use it for literals.
---
 lang/cem/cemcom.ansi/BigPars      |   5 --
 lang/cem/cemcom.ansi/LLlex.c      | 118 ++++++++++++++++++++----------
 lang/cem/cemcom.ansi/LLlex.h      |   2 +-
 lang/cem/cemcom.ansi/SmallPars    |   5 --
 lang/cem/cemcom.ansi/arith.c      |  72 ++++++++++++++----
 lang/cem/cemcom.ansi/arith.h      |  34 ++++-----
 lang/cem/cemcom.ansi/ch3.c        |   4 +-
 lang/cem/cemcom.ansi/ch3bin.c     |   6 +-
 lang/cem/cemcom.ansi/ch3mon.c     |   3 +-
 lang/cem/cemcom.ansi/code.c       |   2 +-
 lang/cem/cemcom.ansi/conversion.c |   2 -
 lang/cem/cemcom.ansi/cstoper.c    |  61 ++++++++-------
 lang/cem/cemcom.ansi/declar.g     |   6 +-
 lang/cem/cemcom.ansi/declarator.c |   6 +-
 lang/cem/cemcom.ansi/dumpidf.c    |   6 +-
 lang/cem/cemcom.ansi/eval.c       |  20 ++---
 lang/cem/cemcom.ansi/eval.h       |   3 +-
 lang/cem/cemcom.ansi/expr.c       |  21 ++++--
 lang/cem/cemcom.ansi/expr.str     |   6 +-
 lang/cem/cemcom.ansi/field.c      |   7 +-
 lang/cem/cemcom.ansi/ival.g       |  14 ++--
 lang/cem/cemcom.ansi/stab.c       |   2 -
 lang/cem/cemcom.ansi/statement.g  |  10 +--
 lang/cem/cemcom.ansi/switch.c     |  16 ++--
 lang/cem/cemcom.ansi/switch.str   |   6 +-
 lang/cem/cemcom.ansi/tokenname.c  |   1 +
 26 files changed, 258 insertions(+), 180 deletions(-)

diff --git a/lang/cem/cemcom.ansi/BigPars b/lang/cem/cemcom.ansi/BigPars
index ad758c8a6..882841e64 100644
--- a/lang/cem/cemcom.ansi/BigPars
+++ b/lang/cem/cemcom.ansi/BigPars
@@ -117,11 +117,6 @@
 /*#define NOBITFIELD	1	*//* if NOT defined, implement bitfields	*/
 
 
-!File: spec_arith.h
-/* describes internal compiler arithmetics */
-#undef	SPECIAL_ARITHMETICS	/* something different from native long */
-
-
 !File: static.h
 #define GSTATIC			/* for large global "static" arrays */
 
diff --git a/lang/cem/cemcom.ansi/LLlex.c b/lang/cem/cemcom.ansi/LLlex.c
index d4be03449..da18e42cb 100644
--- a/lang/cem/cemcom.ansi/LLlex.c
+++ b/lang/cem/cemcom.ansi/LLlex.c
@@ -17,6 +17,7 @@
 #include "Lpars.h"
 #include "class.h"
 #include "sizes.h"
+#include "type.h"     /* no_long_long() */
 #include "error.h"
 #include "domacro.h"
 #include "specials.h" /* registration of special identifiers */
@@ -37,7 +38,6 @@ int LexSave = 0; /* last character read by GetChar	*/
 
 #define FLG_ESEEN 0x01 /* possibly a floating point number */
 #define FLG_DOTSEEN 0x02 /* certainly a floating point number */
-extern arith full_mask[];
 
 #ifdef LINT
 extern int lint_skip_comment;
@@ -594,10 +594,12 @@ static void strflt2tok(char fltbuf[], struct token* ptok)
 static void strint2tok(char intbuf[], struct token* ptok)
 {
 	register char* cp = intbuf;
-	int base = 10;
-	arith val = 0, dig, ubound;
-	int uns_flg = 0, lng_flg = 0, malformed = 0, ovfl = 0;
-	int fund;
+	int base = 10, dig;
+	unsigned writh val = 0, ubound;
+	int uns_flg = 0, lng_flg = 0, lnglng_flg = 0;
+	int malformed = 0, ovfl = 0;
+	unsigned writh uint_mask, ulng_mask, ulnglng_mask;
+	int cut, fund;
 
 	assert(*cp != '-');
 	if (*cp == '0')
@@ -611,11 +613,8 @@ static void strint2tok(char intbuf[], struct token* ptok)
 		else
 			base = 8;
 	}
-	/* The upperbound will be the same as when computed with
-	 * max_unsigned_arith / base (since base is even). The problem here
-	 * is that unsigned arith is not accepted by all compilers.
-	 */
-	ubound = max_arith / (base / 2);
+	/* The upperbound checks if val * base would overflow. */
+	ubound = ~(unsigned writh)0 / base;
 
 	while (is_hex(*cp))
 	{
@@ -626,10 +625,10 @@ static void strint2tok(char intbuf[], struct token* ptok)
 		}
 		else
 		{
-			if (val < 0 || val > ubound)
+			if (val > ubound)
 				ovfl++;
 			val *= base;
-			if (val < 0 && val + dig >= 0)
+			if (val > val + dig)
 				ovfl++;
 			val += dig;
 		}
@@ -639,7 +638,16 @@ static void strint2tok(char intbuf[], struct token* ptok)
 	while (*cp)
 	{
 		if (*cp == 'l' || *cp == 'L')
-			lng_flg++;
+		{
+			if (*cp == *(cp + 1))
+			{
+				/* 'll' or 'LL' */
+				lnglng_flg++;
+				cp++;
+			}
+			else
+				lng_flg++;
+		}
 		else if (*cp == 'u' || *cp == 'U')
 			uns_flg++;
 		else
@@ -658,51 +666,83 @@ static void strint2tok(char intbuf[], struct token* ptok)
 	}
 	else
 	{
-		if (lng_flg > 1)
+		if (lng_flg + lnglng_flg > 1)
 			lexerror("only one long suffix allowed");
 		if (uns_flg > 1)
 			lexerror("only one unsigned suffix allowed");
 	}
+
+	/* Get masks like 0XFFFF, 0XFFFFFFFF as unsigned values. */
+	uint_mask = (unsigned writh)full_mask[(int)int_size];
+	ulng_mask = (unsigned writh)full_mask[(int)long_size];
+	if (lnglng_size < 0)
+		ulnglng_mask = 0;
+	else
+		ulnglng_mask = (unsigned writh)full_mask[(int)lnglng_size];
+
+	/*	If a decimal literal with no suffix is too big for int
+	    and long, then C89 tries unsigned long, but C99 tries
+	    long long (WG14, Rationale for C99, C99RationaleV5.10.pdf,
+	    6.4.4.1 Integer constants).
+		This compiler follows C89 when the literal has no
+	    long long suffix.
+	*/
+	cut = 0;
 	if (ovfl)
 	{
 		lexwarning("overflow in constant");
-		fund = ULONG;
+		cut = 1; /* cut the size of the constant */
 	}
-	else if (!lng_flg && (val & full_mask[(int)int_size]) == val)
+	else if (!lng_flg && !lnglng_flg && (val & uint_mask) == val)
 	{
-		if (val >= 0 && val <= max_int)
-		{
+		if ((val & (uint_mask >> 1)) == val)
 			fund = INT;
-		}
-		else if (int_size == long_size)
-		{
-			fund = UNSIGNED;
-		}
 		else if (base == 10 && !uns_flg)
-			fund = LONG;
+		{
+			if ((val & (ulng_mask >> 1)) == val)
+				fund = LONG;
+			else
+				fund = ULONG;
+		}
 		else
 			fund = UNSIGNED;
 	}
-	else if ((val & full_mask[(int)long_size]) == val)
+	else if (!lnglng_flg && (val & ulng_mask) == val)
 	{
-		if (val >= 0)
+		if ((val & (ulng_mask >> 1)) == val)
 			fund = LONG;
 		else
 			fund = ULONG;
 	}
-	else
-	{ /* sizeof(arith) is greater than long_size */
-		assert(arith_size > long_size);
-		lexwarning("constant too large for target machine");
-		/* cut the size to prevent further complaints */
-		val &= full_mask[(int)long_size];
-		fund = ULONG;
-	}
-	if (lng_flg)
+	else if (lnglng_flg && (val & ulnglng_mask) == val)
 	{
-		/* fund can't be INT */
-		if (fund == UNSIGNED)
+		if ((val & (ulnglng_mask >> 1)) == val)
+			fund = LNGLNG;
+		else
+			fund = ULNGLNG;
+	}
+	else if (lnglng_flg && no_long_long())
+		fund = ERRONEOUS;
+	else
+	{
+		assert(sizeof(val) > long_size ||
+		       (lnglng_size >= 0 && sizeof(val) > lnglng_size));
+		lexwarning("constant too large for target machine");
+		cut = 1;
+	}
+	if (cut)
+	{
+		/* cut the size to prevent further complaints */
+		if (lnglng_flg)
+		{
+			fund = ULNGLNG;
+			val &= ulnglng_mask;
+		}
+		else
+		{
 			fund = ULONG;
+			val &= ulng_mask;
+		}
 	}
 	if (uns_flg)
 	{
@@ -710,7 +750,9 @@ static void strint2tok(char intbuf[], struct token* ptok)
 			fund = UNSIGNED;
 		else if (fund == LONG)
 			fund = ULONG;
+		else if (fund == LNGLNG)
+			fund = ULNGLNG;
 	}
 	ptok->tk_fund = fund;
-	ptok->tk_ival = val;
+	ptok->tk_ival = (writh)val;
 }
diff --git a/lang/cem/cemcom.ansi/LLlex.h b/lang/cem/cemcom.ansi/LLlex.h
index a52f5ed78..921c255dd 100644
--- a/lang/cem/cemcom.ansi/LLlex.h
+++ b/lang/cem/cemcom.ansi/LLlex.h
@@ -26,7 +26,7 @@ struct token	{
 			char *tok_bts;	/* row of bytes */
 			int tok_len;	/* length of row of bytes */
 		} tok_string;
-		arith tok_ival;		/* for INTEGER */
+		writh tok_ival;		/* for INTEGER */
 		char *tok_fval;		/* for FLOATING */
 	} tok_data;
 };
diff --git a/lang/cem/cemcom.ansi/SmallPars b/lang/cem/cemcom.ansi/SmallPars
index 01f7073b1..fa3147ea9 100644
--- a/lang/cem/cemcom.ansi/SmallPars
+++ b/lang/cem/cemcom.ansi/SmallPars
@@ -117,11 +117,6 @@
 /*#define NOBITFIELD	1	/* if NOT defined, implement bitfields	*/
 
 
-!File: spec_arith.h
-/* describes internal compiler arithmetics */
-#undef	SPECIAL_ARITHMETICS	/* something different from native long */
-
-
 !File: static.h
 #define GSTATIC			/* for large global "static" arrays */
 
diff --git a/lang/cem/cemcom.ansi/arith.c b/lang/cem/cemcom.ansi/arith.c
index e1c3311c3..21a86faf3 100644
--- a/lang/cem/cemcom.ansi/arith.c
+++ b/lang/cem/cemcom.ansi/arith.c
@@ -371,8 +371,7 @@ int int2int(struct expr **expp, register struct type *tp)
 					unsigned int x = ~0;
 					unsigned int y = -1;
 			*/
-			extern long full_mask[];
-			long remainder = exp->VL_VALUE &
+			writh remainder = exp->VL_VALUE &
 						~full_mask[(int)(tp->tp_size)];
 
 			if (remainder == 0 ||
@@ -389,6 +388,16 @@ int int2int(struct expr **expp, register struct type *tp)
 	return exp->ex_type->tp_fund;
 }
 
+static int fit4(writh val, int uns)
+{
+	/* Does this value fit in 4 bytes? */
+	unsigned writh u = (unsigned writh)val;
+
+	if (!uns)
+		u += 0x80000000UL;
+	return (u & full_mask[4]) == u;
+}
+
 /* With compile-time constants, we don't set fp_used, since this is done
  * only when necessary in eval.c.
  */
@@ -400,10 +409,10 @@ void int2float(register struct expr **expp, struct type *tp)
 	register struct expr *exp = *expp;
 	int uns = exp->ex_type->tp_unsigned;
 	
-	if (is_cp_cst(exp)) {
+	if (is_cp_cst(exp) && fit4(exp->VL_VALUE, uns)) {
 		exp->ex_type = tp;
 		exp->ex_class = Float;
-		flt_arith2flt(exp->VL_VALUE, &(exp->FL_ARITH), uns);
+		flt_arith2flt((arith)exp->VL_VALUE, &(exp->FL_ARITH), uns);
 	}
 	else	{
 		fp_used = 1;
@@ -417,24 +426,35 @@ void float2int(struct expr **expp, struct type *tp)
 		converted to the integral type tp.
 	*/
 	register struct expr *ex = *expp;
-	
+
 	if (is_fp_cst(ex)) {
 		arith ar = flt_flt2arith(&ex->FL_ARITH, tp->tp_unsigned);
-
+#ifdef NOTDEF
+		/*	Historically, we always did the conversion at
+		    compile time.   This is now wrong if type arith is
+		    too narrow for an 8-byte integer.
+		*/
 		if (flt_status == FLT_OVFL)
 			expr_warning(ex,"overflow in float to int conversion");
 		else if (flt_status == FLT_UNFL)
 			expr_warning(ex,"underflow in float to unsigned conversion");
-		ex->ex_type = tp;
-		/* The following lines are copied from fill_int_expr */
-		ex->ex_class = Value;
-		ex->VL_CLASS = Const;
-		ex->VL_VALUE = ar;
-		cut_size(ex);
-	} else {
-		fp_used = 1;
-		*expp = arith2arith(tp, FLOAT2INT, ex);
+#endif /* NOTDEF */
+		/*	Now, we defer the conversion until run time
+		    unless it fits in 4 bytes.
+		*/
+		if (flt_status != FLT_OVFL && flt_status != FLT_UNFL &&
+		    fit4((writh)ar, tp->tp_unsigned)) {
+			ex->ex_type = tp;
+			/* The following lines are copied from fill_int_expr */
+			ex->ex_class = Value;
+			ex->VL_CLASS = Const;
+			ex->VL_VALUE = (writh)ar;
+			cut_size(ex);
+			return;
+		}
 	}
+	fp_used = 1;
+	*expp = arith2arith(tp, FLOAT2INT, ex);
 }
 
 void float2float(register struct expr **expp, struct type *tp)
@@ -640,3 +660,25 @@ void switch_sign_fp(register struct expr *expr)
 {
 	flt_umin(&(expr->FL_ARITH));
 }
+
+char *writh2str(writh val, int uns)
+{
+	/*	Converts val to a decimal string, like
+		long2str(val, 10), but allows wider values.
+	*/
+	static char buf[NUMSIZE + 1];
+	char *cp = &buf[NUMSIZE + 1];
+	int negative = (!uns && val < 0);
+	unsigned writh u = (unsigned writh)val;
+
+	if (negative)
+		u = -u;
+	*--cp = '\0';
+	do {
+		*--cp = '0' + (u % 10);
+		u /= 10;
+	} while (u != 0);
+	if (negative)
+		*--cp = '-';
+	return cp;
+}
diff --git a/lang/cem/cemcom.ansi/arith.h b/lang/cem/cemcom.ansi/arith.h
index 65b0003de..a0b7148a7 100644
--- a/lang/cem/cemcom.ansi/arith.h
+++ b/lang/cem/cemcom.ansi/arith.h
@@ -5,30 +5,25 @@
 /* $Id$ */
 /* COMPILER ARITHMETIC */
 
-/*	Normally the compiler does its internal arithmetics in longs
-	native to the source machine, which is always good for local
-	compilations, and generally OK too for cross compilations
-	downwards and sidewards.  For upwards cross compilation and
-	to save storage on small machines, SPECIAL_ARITHMETICS will
-	be handy.
+/*	The compiler uses 2 types, arith and writh, for its internal
+	arithmetic.  Type arith is normally long, and may be too
+	narrow for long long values.  We can't change arith to a wider
+	type, because both <em_arith.h> (pulled by <em.h>) and
+	<flt_arith.h> define arith.
+
+	Type writh (wide arithmetic) is for values that might not fit
+	in arith.  Normally writh is the long long native to the
+	source machine, which is always good for local compilations,
+	and generally OK too for cross compilations downwards and
+	sidewards.
 */
 #ifndef ARITH_H_
 #define ARITH_H_
 
-#include	"parameters.h"
-
-#ifndef	SPECIAL_ARITHMETICS
-
 #include    <em_arith.h>		/* obtain definition of "arith"	*/
-#include	<flt_arith.h>
 
-#else	/* SPECIAL_ARITHMETICS */
-
-/*	All preprocessor arithmetic should be done in longs.
-*/
-#define	arith	long				/* dummy */
-
-#endif	/* SPECIAL_ARITHMETICS */
+#define	writh		long long
+/* The compiler also uses "unsigned writh". */
 
 struct expr;
 struct type;
@@ -37,6 +32,8 @@ struct type;
 #define	arith_sign	((arith) 1 << (arith_size * 8 - 1))
 #define	max_arith	(~arith_sign)
 
+extern writh full_mask[];		/* cstoper.c */
+
 void arithbalance(register struct expr **e1p, int oper, register struct expr **e2p);
 void relbalance(register struct expr **e1p, int oper, register struct expr **e2p);
 void ch3pointer(struct expr **expp, int oper, register struct type *tp);
@@ -57,5 +54,6 @@ void any2opnd(register struct expr **expp, int oper);
 void any2parameter(register struct expr **expp);
 void field2arith(register struct expr **expp);
 void switch_sign_fp(register struct expr *expr);
+char *writh2str(writh val, int uns);
 
 #endif /* ARITH_H_ */
diff --git a/lang/cem/cemcom.ansi/ch3.c b/lang/cem/cemcom.ansi/ch3.c
index 69c75fed6..2f70c0c38 100644
--- a/lang/cem/cemcom.ansi/ch3.c
+++ b/lang/cem/cemcom.ansi/ch3.c
@@ -320,7 +320,7 @@ void ch3cast(register struct expr **expp, int oper, register struct type *tp)
 			    }
 			    if (oldtp->tp_up->tp_fund == VOID
 				&& is_cp_cst(exp)
-				&& exp->VL_VALUE == (arith)0)
+				&& exp->VL_VALUE == 0)
 				break;	/* switch */
 		    }
 		    /* falltrough */
@@ -366,7 +366,7 @@ void ch3cast(register struct expr **expp, int oper, register struct type *tp)
 		case NOTEQUAL:
 		case '=':
 		case RETURN:
-			if (is_cp_cst(exp) && exp->VL_VALUE == (arith)0)
+			if (is_cp_cst(exp) && exp->VL_VALUE == 0)
 				break;
 		default:
 			expr_strict(exp,
diff --git a/lang/cem/cemcom.ansi/ch3bin.c b/lang/cem/cemcom.ansi/ch3bin.c
index e692e98fa..d95f5b739 100644
--- a/lang/cem/cemcom.ansi/ch3bin.c
+++ b/lang/cem/cemcom.ansi/ch3bin.c
@@ -206,7 +206,7 @@ void ch3bin(register struct expr **expp, int oper, struct expr *expr)
 				where o1 == (*expp)->VL_VALUE;
 				and ((oper == AND) || (oper == OR))
 			*/
-			if ((oper == AND) == (ex->VL_VALUE != (arith)0)) {
+			if ((oper == AND) == (ex->VL_VALUE != 0)) {
 				*expp = expr;
 			}
 			else {
@@ -224,13 +224,13 @@ void ch3bin(register struct expr **expp, int oper, struct expr *expr)
 				where o2 == expr->VL_VALUE
 				and ((oper == AND) || (oper == OR))
 			*/
-			if ((oper == AND) == (expr->VL_VALUE != (arith)0)) {
+			if ((oper == AND) == (expr->VL_VALUE != 0)) {
 				(*expp)->ex_flags |= expr->ex_flags | EX_ILVALUE;
 				free_expression(expr);
 			}
 			else {
 				if (oper == OR)
-					expr->VL_VALUE = (arith)1;
+					expr->VL_VALUE = 1;
 				ch3bin(expp, ',', expr);
 			}
 		}
diff --git a/lang/cem/cemcom.ansi/ch3mon.c b/lang/cem/cemcom.ansi/ch3mon.c
index a84a141f2..456f25f8a 100644
--- a/lang/cem/cemcom.ansi/ch3mon.c
+++ b/lang/cem/cemcom.ansi/ch3mon.c
@@ -22,7 +22,6 @@
 
 
 extern char options[];
-extern arith full_mask[/*MAXSIZE + 1*/];	/* cstoper.c */
 char *symbol2str();
 
 void ch3mon(int oper, register struct expr **expp)
@@ -122,7 +121,7 @@ void ch3mon(int oper, register struct expr **expp)
 	case '-':
 		any2arith(expp, oper);
 		if (is_cp_cst(*expp))	{
-			arith o1 = (*expp)->VL_VALUE;
+			writh o1 = (*expp)->VL_VALUE;
 
 			(*expp)->VL_VALUE = (oper == '-') ? -o1 :
 			  ((*expp)->ex_type->tp_unsigned ?
diff --git a/lang/cem/cemcom.ansi/code.c b/lang/cem/cemcom.ansi/code.c
index cef708cbf..762124338 100644
--- a/lang/cem/cemcom.ansi/code.c
+++ b/lang/cem/cemcom.ansi/code.c
@@ -602,7 +602,7 @@ void loc_init(struct expr *expr, struct idf *id)
 			EVAL(expr, RVAL, TRUE, NO_LABEL, NO_LABEL);
 			vl.vl_class = Name;
 			vl.vl_data.vl_idf = id;
-			vl.vl_value = (arith)0;
+			vl.vl_value = 0;
 			store_val(&vl, tp);
 		}
 #else	/* LINT */
diff --git a/lang/cem/cemcom.ansi/conversion.c b/lang/cem/cemcom.ansi/conversion.c
index 13af3cef8..c2a1071b7 100644
--- a/lang/cem/cemcom.ansi/conversion.c
+++ b/lang/cem/cemcom.ansi/conversion.c
@@ -112,8 +112,6 @@ void conversion(register struct type *from_type, register struct type *to_type)
 	if ((int)(to_type->tp_size) < (int)word_size
 	    && to_cnvtype != T_FLOATING
 	    ) {
-		extern arith full_mask[];
-
 		if (to_cnvtype == T_SIGNED) {
 			C_loc(to_type->tp_size);
 			C_loc(word_size);
diff --git a/lang/cem/cemcom.ansi/cstoper.c b/lang/cem/cemcom.ansi/cstoper.c
index 61c3fd6e3..769c6940b 100644
--- a/lang/cem/cemcom.ansi/cstoper.c
+++ b/lang/cem/cemcom.ansi/cstoper.c
@@ -18,7 +18,7 @@
 #include    "error.h"
 
 /* full_mask[1] == 0XFF, full_mask[2] == 0XFFFF, .. */
-arith full_mask[MAXSIZE + 1];
+writh full_mask[MAXSIZE + 1];
 #ifndef NOCROSS
 arith max_int;		/* maximum integer on target machine	*/
 arith max_unsigned;	/* maximum unsigned on target machine	*/
@@ -31,8 +31,8 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 		expressions *expp(ld) and expr(ct), and the result restored in
 		*expp.
 	*/
-	register arith o1 = (*expp)->VL_VALUE;
-	register arith o2 = expr->VL_VALUE;
+	unsigned writh o1 = (unsigned writh)(*expp)->VL_VALUE;
+	unsigned writh o2 = (unsigned writh)expr->VL_VALUE;
 	int uns = (*expp)->ex_type->tp_unsigned;
 
 	assert(is_ld_cst(*expp) && is_cp_cst(expr));
@@ -49,9 +49,9 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 			break;
 		}
 		if (uns)
-			o1 /= (unsigned arith) o2;
-		else
 			o1 /= o2;
+		else
+			o1 = (unsigned writh)((writh)o1 / (writh)o2);
 		break;
 	case '%':
 		if (o2 == 0)	{
@@ -62,9 +62,9 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 			break;
 		}
 		if (uns)
-			o1 %= (unsigned arith) o2;
-		else
 			o1 %= o2;
+		else
+			o1 = (unsigned writh)((writh)o1 % (writh)o2);
 		break;
 	case '+':
 		o1 += o2;
@@ -78,15 +78,14 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 	case RIGHT:
 		if (o2 == 0)
 			break;
-		if (uns)	{
-			o1 = (o1 >> 1) & ~arith_sign;
-			o1 >>= (o2 - 1);
-		}
-		else	o1 >>= o2;
+		if (uns)
+			o1 >>= o2;
+		else
+			o1 = (unsigned writh)((writh)o1 >> (writh)o2);
 		break;
 	case '<':
 		{
-			arith tmp = o1;
+			writh tmp = o1;
 
 			o1 = o2;
 			o2 = tmp;
@@ -94,13 +93,13 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 		/* Fall through */
 	case '>':
 		if (uns)
-			o1 = (unsigned arith) o1 > (unsigned arith) o2;
-		else
 			o1 = o1 > o2;
+		else
+			o1 = (writh)o1 > (writh)o2;
 		break;
 	case LESSEQ:
 		{
-			arith tmp = o1;
+			writh tmp = o1;
 
 			o1 = o2;
 			o2 = tmp;
@@ -108,9 +107,9 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 		/* Fall through */
 	case GREATEREQ:
 		if (uns)
-			o1 = (unsigned arith) o1 >= (unsigned arith) o2;
-		else
 			o1 = o1 >= o2;
+		else
+			o1 = (writh)o1 >= (writh)o2;
 		break;
 	case EQUAL:
 		o1 = o1 == o2;
@@ -128,7 +127,7 @@ void cstbin(register struct expr **expp, int oper, register struct expr *expr)
 		o1 ^= o2;
 		break;
 	}
-	(*expp)->VL_VALUE = o1;
+	(*expp)->VL_VALUE = (writh)o1;
 	cut_size(*expp);
 	(*expp)->ex_flags |= expr->ex_flags;
 	(*expp)->ex_flags &= ~EX_PARENS;
@@ -140,7 +139,7 @@ void cut_size(register struct expr *expr)
 	/*	The constant value of the expression expr is made to
 		conform to the size of the type of the expression.
 	*/
-	register arith o1 = expr->VL_VALUE;
+	writh o1 = expr->VL_VALUE;
 	int uns = expr->ex_type->tp_unsigned;
 	int size = (int) expr->ex_type->tp_size;
 
@@ -159,8 +158,8 @@ void cut_size(register struct expr *expr)
 		o1 &= full_mask[size];
 	}
 	else {
-		int nbits = (int) (arith_size - size) * 8;
-		arith remainder = o1 & ~full_mask[size];
+		int nbits = (int) (sizeof(o1) - size) * 8;
+		writh remainder = o1 & ~full_mask[size];
 
 		if (remainder != 0 && remainder != ~full_mask[size])
 		    if (!ResultKnown)
@@ -173,22 +172,20 @@ void cut_size(register struct expr *expr)
 void init_cst(void)
 {
 	register int i = 0;
-	register arith bt = (arith)0;
+	unsigned writh bt = 0;
 
-	/*	FIXME arith is insufficient for long long.  We ignore
-		this problem and write masks up to full_mask[8], but
-		masks are wrong after bt < 0.
-	*/
-	while (!(bt < 0) || i < 8) {
+	while (!((writh)bt < 0)) {
 		bt = (bt << 8) + 0377, i++;
 		if (i > MAXSIZE)
 			fatal("array full_mask too small for this machine");
 		full_mask[i] = bt;
 	}
-	if ((int)long_size > arith_size)
-		fatal("sizeof (arith) insufficient on this machine");
+	/* signed comparison; lnglng_size might be -1 */
+	if (long_size > (arith)sizeof(writh) ||
+	    lnglng_size > (arith)sizeof(writh))
+		fatal("sizeof(writh) insufficient on this machine");
 #ifndef NOCROSS
-	max_int = full_mask[(int)int_size] & ~(1L << ((int)int_size * 8 - 1));
-	max_unsigned = full_mask[(int)int_size];
+	max_int = (arith)((unsigned writh)full_mask[(int)int_size] >> 1);
+	max_unsigned = (arith)full_mask[(int)int_size];
 #endif /* NOCROSS */
 }
diff --git a/lang/cem/cemcom.ansi/declar.g b/lang/cem/cemcom.ansi/declar.g
index 8bec8e72d..abd7263d4 100644
--- a/lang/cem/cemcom.ansi/declar.g
+++ b/lang/cem/cemcom.ansi/declar.g
@@ -343,7 +343,7 @@ arrayer(arith *sizep;)
 			constant_expression(&expr)
 			{
 				check_array_subscript(expr);
-				*sizep = expr->VL_VALUE;
+				*sizep = (arith)expr->VL_VALUE;
 				free_expression(expr);
 			}
 		]?
@@ -430,7 +430,7 @@ enumerator(struct type *tp; arith *lp;)
 		'='
 		constant_expression(&expr)
 		{
-			*lp = expr->VL_VALUE;
+			*lp = (arith)expr->VL_VALUE;
 			free_expression(expr);
 		}
 	]?
@@ -548,7 +548,7 @@ bit_expression(struct field **fd;)
 	':'
 	constant_expression(&expr)
 	{
-		(*fd)->fd_width = expr->VL_VALUE;
+		(*fd)->fd_width = (arith)expr->VL_VALUE;
 		free_expression(expr);
 #ifdef NOBITFIELD
 		error("bitfields are not implemented");
diff --git a/lang/cem/cemcom.ansi/declarator.c b/lang/cem/cemcom.ansi/declarator.c
index 2795bda86..c1ce2d88f 100644
--- a/lang/cem/cemcom.ansi/declarator.c
+++ b/lang/cem/cemcom.ansi/declarator.c
@@ -118,11 +118,11 @@ void reject_params(register struct declarator *dc)
 
 void check_array_subscript(register struct expr *expr)
 {
-	arith size = expr->VL_VALUE;
+	writh size = expr->VL_VALUE;
 
 	if (size < 0)	{
 		error("array size is negative");
-		expr->VL_VALUE = (arith)1;
+		expr->VL_VALUE = 1;
 	}
 	else
 	if (size == 0) {
@@ -131,6 +131,6 @@ void check_array_subscript(register struct expr *expr)
 	else
 	if (size & ~max_unsigned) {	/* absolutely ridiculous */
 		expr_error(expr, "overflow in array size");
-		expr->VL_VALUE = (arith)1;
+		expr->VL_VALUE = 1;
 	}
 }
diff --git a/lang/cem/cemcom.ansi/dumpidf.c b/lang/cem/cemcom.ansi/dumpidf.c
index 3c866b36b..c8f4e1a60 100644
--- a/lang/cem/cemcom.ansi/dumpidf.c
+++ b/lang/cem/cemcom.ansi/dumpidf.c
@@ -6,8 +6,8 @@
 /*	DUMP ROUTINES	*/
 
 
-#ifdef	DEBUG
 #include	"parameters.h"
+#ifdef	DEBUG
 #include	<ack_string.h>
 #include	<alloc.h>
 #include	<flt_arith.h>
@@ -442,8 +442,8 @@ static void p1_expr(int lvl, register struct expr *expr)
 			print("(Unknown) ");
 			break;
 		}
-		print(expr->ex_type->tp_unsigned ? "%lu\n" : "%ld\n",
-			expr->VL_VALUE);
+		print("%s\n", writh2str(expr->VL_VALUE,
+					expr->ex_type->tp_unsigned));
 		break;
 	case String:
 	{
diff --git a/lang/cem/cemcom.ansi/eval.c b/lang/cem/cemcom.ansi/eval.c
index 338ca51cb..efe40bad6 100644
--- a/lang/cem/cemcom.ansi/eval.c
+++ b/lang/cem/cemcom.ansi/eval.c
@@ -106,7 +106,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 	case String:	/* a string constant	*/
 		if (gencode) {
 			string2pointer(expr);
-			C_lae_dlb(expr->VL_LBL, expr->VL_VALUE);
+			C_lae_dlb(expr->VL_LBL, (arith)expr->VL_VALUE);
 		}
 		break;
 	case Float:	/* a floating constant	*/
@@ -575,7 +575,7 @@ void EVAL(register struct expr *expr, int val, int code, label true_label, label
 				NO_LABEL, NO_LABEL);
 			assert(is_cp_cst(right));
 			if (gencode) {
-				C_adp(right->VL_VALUE);
+				C_adp((arith)right->VL_VALUE);
 			}
 			break;
 		case ',':
@@ -858,10 +858,11 @@ void store_val(register struct value *vl, register struct type *tp)
 {
 	register int inword = 0;
 	register int indword = 0;
-	arith val = vl->vl_value;
+	writh wval = vl->vl_value;
+	arith val = (arith)wval;
 
 	if (vl->vl_class == Const) {	/* absolute addressing */
-		load_cst(val, pointer_size);
+		load_cst(wval, pointer_size);
 		store_block(tp->tp_size, tp->tp_align);
 		return;
 	}
@@ -930,7 +931,8 @@ void load_val(register struct expr *expr, int rlval)
 	register struct type *tp = expr->ex_type;
 	int rvalue = (rlval == RVAL && expr->ex_lvalue != 0);
 	register int inword = 0, indword = 0;
-	register arith val = expr->VL_VALUE;
+	writh wval = expr->VL_VALUE;
+	arith val = (arith)wval;
 
 	if (expr->ex_type->tp_fund == FLOAT
 	    || expr->ex_type->tp_fund == DOUBLE
@@ -938,11 +940,11 @@ void load_val(register struct expr *expr, int rlval)
 		fp_used = 1;
 	if (expr->VL_CLASS == Const) {
 		if (rvalue) { /* absolute addressing */
-			load_cst(val, pointer_size);
+			load_cst(wval, pointer_size);
 			load_block(tp->tp_size, tp->tp_align);
 		}
 		else	/* integer, unsigned, long, enum etc	*/
-			load_cst(val, tp->tp_size);
+			load_cst(wval, tp->tp_size);
 		return;
 	}
 	if (rvalue) {
@@ -1022,7 +1024,7 @@ void load_val(register struct expr *expr, int rlval)
 	}
 }
 
-void load_cst(arith val, arith siz)
+void load_cst(writh val, arith siz)
 {
 	/*	EM can't encode ldc with constant over 4 bytes.
 		Such a constant must go into rom.
@@ -1036,7 +1038,7 @@ void load_cst(arith val, arith siz)
 		label datlab;
 
 		C_df_dlb(datlab = data_label());
-		C_rom_icon(long2str((long)val, 10), siz);
+		C_rom_icon(writh2str(val, 0), siz);
 		C_lae_dlb(datlab, (arith)0);
 		C_loi(siz);
 	}
diff --git a/lang/cem/cemcom.ansi/eval.h b/lang/cem/cemcom.ansi/eval.h
index 337b5ac57..53a7f8d12 100644
--- a/lang/cem/cemcom.ansi/eval.h
+++ b/lang/cem/cemcom.ansi/eval.h
@@ -12,6 +12,7 @@
 
 
 #include <em.h>
+#include "arith.h" /* writh */
 
 struct expr;
 struct value;
@@ -30,7 +31,7 @@ void assop(register struct type *type, int oper);
 */
 void store_val(register struct value *vl, register struct type *tp);
 void load_val(register struct expr *expr, int rlval);
-void load_cst(arith val, arith siz);
+void load_cst(writh val, arith siz);
 
 #endif /* LINT */
 
diff --git a/lang/cem/cemcom.ansi/expr.c b/lang/cem/cemcom.ansi/expr.c
index b24c408d0..3dfbc5da5 100644
--- a/lang/cem/cemcom.ansi/expr.c
+++ b/lang/cem/cemcom.ansi/expr.c
@@ -184,21 +184,21 @@ void idf2expr(register struct expr *expr)
 	if (def->df_sc == ENUM)
 	{
 		expr->VL_CLASS = Const;
-		expr->VL_VALUE = def->df_address;
+		expr->VL_VALUE = (writh)def->df_address;
 	}
 #ifndef	LINT
 	else if (def->df_sc == STATIC && def->df_level >= L_LOCAL)
 	{
 		expr->VL_CLASS = Label;
 		expr->VL_LBL = def->df_address;
-		expr->VL_VALUE = (arith) 0;
+		expr->VL_VALUE = 0;
 	}
 #endif	/* LINT */
 	else
 	{
 		expr->VL_CLASS = Name;
 		expr->VL_IDF = idf;
-		expr->VL_VALUE = (arith) 0;
+		expr->VL_VALUE = 0;
 	}
 }
 
@@ -270,12 +270,12 @@ arith ivalue, int fund)
 
 	expr->ex_file = dot.tk_file;
 	expr->ex_line = dot.tk_line;
-	fill_int_expr(expr, ivalue, fund);
+	fill_int_expr(expr, (writh)ivalue, fund);
 	return expr;
 }
 
 void fill_int_expr(register struct expr *ex,
-arith ivalue, int fund)
+writh ivalue, int fund)
 {
 	/*	Details derived from ivalue and fund are put into the
 	 constant integer expression ex.
@@ -294,6 +294,16 @@ arith ivalue, int fund)
 	case ULONG:
 		ex->ex_type = ulong_type;
 		break;
+	case LNGLNG:
+		ex->ex_type = lnglng_type;
+		break;
+	case ULNGLNG:
+		ex->ex_type = ulnglng_type;
+		break;
+	case ERRONEOUS:		/* 123LL when no_long_long() */
+		ex->ex_type = error_type;
+		ex->ex_flags |= EX_ERROR;
+		break;
 	default:
 		crash("(fill_int_expr) bad fund %s\n", symbol2str(fund));
 		/*NOTREACHED*/
@@ -406,6 +416,7 @@ void chk_cst_expr(struct expr **expp)
 	case INT:
 	case ENUM:
 	case LONG:
+	case LNGLNG:
 		if (is_ld_cst(expr))
 		{
 			return;
diff --git a/lang/cem/cemcom.ansi/expr.str b/lang/cem/cemcom.ansi/expr.str
index 97ac5b1f9..51718ace6 100644
--- a/lang/cem/cemcom.ansi/expr.str
+++ b/lang/cem/cemcom.ansi/expr.str
@@ -23,7 +23,7 @@
 
 struct value	{
 	int vl_class;		/* Const, Name or Label	*/
-	arith vl_value;		/* constant value or offset */
+	writh vl_value;		/* constant value or offset */
 	union {
 		struct idf *vl_idf;	/* external name */
 		label vl_lbl;		/* compiler-generated label */
@@ -100,8 +100,6 @@ struct expr	{
 #define	ISNAME(e)	((e)->ex_class == Value && (e)->VL_CLASS == Name)
 #define	ISCOMMA(e)	((e)->ex_class == Oper && (e)->OP_OPER == INITCOMMA)
 
-extern struct expr *intexpr(), *new_oper();
-
 /* ALLOCDEF "expr" 20 */
 
 
@@ -113,7 +111,7 @@ void string2expr(register struct expr **expp, char *str, int len);
 void int2expr(struct expr *expr);
 void float2expr(register struct expr *expr);
 struct expr* intexpr(arith ivalue, int fund);
-void fill_int_expr(register struct expr *ex,arith ivalue, int fund);
+void fill_int_expr(register struct expr *ex, writh ivalue, int fund);
 struct expr *new_oper(struct type *tp, register struct expr *e1, int oper,
 		register struct expr *e2);
 void chk_cst_expr(struct expr **expp);
diff --git a/lang/cem/cemcom.ansi/field.c b/lang/cem/cemcom.ansi/field.c
index d1b014c01..7d7f27726 100644
--- a/lang/cem/cemcom.ansi/field.c
+++ b/lang/cem/cemcom.ansi/field.c
@@ -28,8 +28,6 @@
 #include    "eval.h"
 
 
-extern arith full_mask[];	/* cstoper.c	*/
-
 /*	Eval_field() evaluates expressions involving bit fields.
 	The various instructions are not yet optimised in the expression
 	tree and are therefore dealt with in this function.
@@ -136,6 +134,8 @@ void store_field(
 	register struct expr *leftop,
 	arith tmpvar)
 {
+	arith high_mask;
+
 	C_loc(fd->fd_mask);
 	C_and(word_size);
 	if (code == TRUE)
@@ -145,7 +145,8 @@ void store_field(
 		C_slu(word_size);
 	else
 		C_sli(word_size);
-	C_loc(~((fd->fd_mask << fd->fd_shift) | ~full_mask[(int)word_size]));
+	high_mask = (arith)~full_mask[(int)word_size];
+	C_loc(~((fd->fd_mask << fd->fd_shift) | high_mask));
 	if (leftop->ex_depth == 0)	{	/* simple case	*/
 		load_val(leftop, RVAL);
 		C_and(word_size);
diff --git a/lang/cem/cemcom.ansi/ival.g b/lang/cem/cemcom.ansi/ival.g
index 995d46248..05fcc4881 100644
--- a/lang/cem/cemcom.ansi/ival.g
+++ b/lang/cem/cemcom.ansi/ival.g
@@ -544,11 +544,11 @@ void check_ival(struct expr **expp, register struct type *tp)
 			if (idf->id_def->df_type->tp_fund == FUNCTION)
 				C_con_pnam(idf->id_text);
 			else	/* e.g., int a; int *p = &a; */
-				C_con_dnam(idf->id_text, expr->VL_VALUE);
+				C_con_dnam(idf->id_text, (arith)expr->VL_VALUE);
 		}
 		else {
 			assert(expr->VL_CLASS == Label);
-			C_con_dlb(expr->VL_LBL, expr->VL_VALUE);
+			C_con_dlb(expr->VL_LBL, (arith)expr->VL_VALUE);
 		}
 		break;
 	case FLOAT:
@@ -595,7 +595,7 @@ and also to prevent runtime coercions for compile-time constants.
 		print_expr("init-expr after cast", expr);
 #endif /* DEBUG */
 		if (is_cp_cst(expr))
-			put_bf(tp, expr->VL_VALUE);
+			put_bf(tp, (arith)expr->VL_VALUE);
 		else
 			illegal_init_cst(expr);
 		break;
@@ -701,7 +701,7 @@ void put_bf(struct type *tp, arith val)
 		field |= (val & fd->fd_mask) << fd->fd_shift;
 	if (sd->sd_sdef == 0 || sd->sd_sdef->sd_offset != offset) {
 		/* the selector was the last stored at this address	*/
-		exp.VL_VALUE = field;
+		exp.VL_VALUE = (writh)field;
 		con_int(&exp);
 		field = (arith)0;
 		offset = (arith)-1;
@@ -739,11 +739,11 @@ void con_int(register struct expr *ex)
 
 	assert(is_cp_cst(ex));
 	if (tp->tp_unsigned)
-		C_con_ucon(long2str((long)ex->VL_VALUE, -10), tp->tp_size);
+		C_con_ucon(writh2str(ex->VL_VALUE, 1), tp->tp_size);
 	else if (tp->tp_size == word_size)
-		C_con_cst(ex->VL_VALUE);
+		C_con_cst((arith)ex->VL_VALUE);
 	else
-		C_con_icon(long2str((long)ex->VL_VALUE, 10), tp->tp_size);
+		C_con_icon(writh2str(ex->VL_VALUE, 0), tp->tp_size);
 }
 
 void illegal_init_cst(struct expr *ex)
diff --git a/lang/cem/cemcom.ansi/stab.c b/lang/cem/cemcom.ansi/stab.c
index 5a002301f..864201aa1 100644
--- a/lang/cem/cemcom.ansi/stab.c
+++ b/lang/cem/cemcom.ansi/stab.c
@@ -32,8 +32,6 @@
 #include	"level.h"
 #include    "print.h"
 
-extern long full_mask[];
-
 #define INCR_SIZE	64
 
 static struct db_str
diff --git a/lang/cem/cemcom.ansi/statement.g b/lang/cem/cemcom.ansi/statement.g
index 3d1dcd4f2..d58ec4b6a 100644
--- a/lang/cem/cemcom.ansi/statement.g
+++ b/lang/cem/cemcom.ansi/statement.g
@@ -147,7 +147,7 @@ if_statement
 				/*	The comparison has been optimized
 					to a 0 or 1.
 				*/
-				if (expr->VL_VALUE == (arith)0)	{
+				if (expr->VL_VALUE == 0)	{
 					C_bra(l_false);
 				}
 				/* else fall through */
@@ -210,7 +210,7 @@ while_statement
 		{
 			opnd2test(&expr, WHILE);
 			if (is_cp_cst(expr))	{
-				if (expr->VL_VALUE == (arith)0)	{
+				if (expr->VL_VALUE == 0)	{
 					C_bra(l_break);
 				}
 			}
@@ -264,11 +264,11 @@ do_statement
 		{
 			opnd2test(&expr, WHILE);
 			if (is_cp_cst(expr))	{
-				if (expr->VL_VALUE == (arith)1)	{
+				if (expr->VL_VALUE == 1)	{
 					C_bra(l_body);
 				}
 #ifdef	LINT
-				end_do_stmt(1, expr->VL_VALUE != (arith)0);
+				end_do_stmt(1, expr->VL_VALUE != 0);
 #endif	/* LINT */
 			}
 			else	{
@@ -313,7 +313,7 @@ for_statement
 		{
 			opnd2test(&e_test, FOR);
 			if (is_cp_cst(e_test))	{
-				if (e_test->VL_VALUE == (arith)0)	{
+				if (e_test->VL_VALUE == 0)	{
 					C_bra(l_break);
 				}
 			}
diff --git a/lang/cem/cemcom.ansi/switch.c b/lang/cem/cemcom.ansi/switch.c
index 7db1f28bd..95da9aabf 100644
--- a/lang/cem/cemcom.ansi/switch.c
+++ b/lang/cem/cemcom.ansi/switch.c
@@ -32,13 +32,13 @@ extern char options[];
 
 int	density = DENSITY;
 
-static int compact(int nr, arith low, arith up)
+static int compact(int nr, writh low, writh up)
 {
 	/*	Careful! up - low might not fit in an arith. And then,
 		the test "up-low < 0" might also not work to detect this
 		situation! Or is this just a bug in the M68020/M68000?
 	*/
-	arith diff = up - low;
+	writh diff = up - low;
 
 	return (nr == 0 || (diff >= 0 && diff / nr <= (density - 1)));
 }
@@ -117,10 +117,10 @@ void code_endswitch(void)
 	    C_rom_ilb(sh->sh_default);
 	    if (compact(sh->sh_nrofentries, sh->sh_lowerbd, sh->sh_upperbd)) {
 		/* CSA */
-		register arith val;
+		writh val;
 
-		C_rom_icon(long2str((long)sh->sh_lowerbd,10), size);
-		C_rom_icon(long2str((long)(sh->sh_upperbd - sh->sh_lowerbd),10),
+		C_rom_icon(writh2str(sh->sh_lowerbd, 0), size);
+		C_rom_icon(writh2str(sh->sh_upperbd - sh->sh_lowerbd, 0),
 				size);
 		ce = sh->sh_entries;
 		for (val = sh->sh_lowerbd; val <= sh->sh_upperbd; val++) {
@@ -136,10 +136,10 @@ void code_endswitch(void)
 		C_csa(size);
 	    }
 	    else { /* CSB */
-		C_rom_icon(long2str((long)sh->sh_nrofentries,10),size);
+		C_rom_icon(writh2str(sh->sh_nrofentries, 0), size);
 		for (ce = sh->sh_entries; ce; ce = ce->next) {
 			/* generate the entries: value + prog.label	*/
-			C_rom_icon(long2str((long)ce->ce_value,10),size);
+			C_rom_icon(writh2str(ce->ce_value, 0), size);
 			C_rom_ilb(ce->ce_label);
 		}
 		C_lae_dlb(tablabel, (arith)0); /* perform the switch	*/
@@ -162,7 +162,7 @@ void code_endswitch(void)
 
 void code_case(struct expr *expr)
 {
-	register arith val;
+	writh val;
 	register struct case_entry *ce;
 	register struct switch_hdr *sh = switch_stack;
 	
diff --git a/lang/cem/cemcom.ansi/switch.str b/lang/cem/cemcom.ansi/switch.str
index 94a45ca42..3ba693498 100644
--- a/lang/cem/cemcom.ansi/switch.str
+++ b/lang/cem/cemcom.ansi/switch.str
@@ -22,8 +22,8 @@ struct switch_hdr	{
 	int sh_nrofentries;
 	struct type *sh_type;
 	struct expr *sh_expr;
-	arith sh_lowerbd;
-	arith sh_upperbd;
+	writh sh_lowerbd;
+	writh sh_upperbd;
 	struct case_entry *sh_entries;
 };
 
@@ -32,7 +32,7 @@ struct switch_hdr	{
 struct case_entry	{
 	struct case_entry *next;
 	label ce_label;
-	arith ce_value;
+	writh ce_value;
 };
 
 /* ALLOCDEF "case_entry" 20 */
diff --git a/lang/cem/cemcom.ansi/tokenname.c b/lang/cem/cemcom.ansi/tokenname.c
index fb3bae349..0a86f480e 100644
--- a/lang/cem/cemcom.ansi/tokenname.c
+++ b/lang/cem/cemcom.ansi/tokenname.c
@@ -107,6 +107,7 @@ struct tokenname tkfunny[] =	{	/* internal keywords */
 	{LNGLNG, "long long"},
 	{LNGDBL, "long double"},
 	{ULONG, "unsigned long"},
+	{ULNGLNG, "unsigned long long"},
 
 	{ARRAY, "array"},
 	{FUNCTION, "function"},

From 20a4d401d0fd5ac98cf33bafecb63ce65127b2db Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Thu, 5 Sep 2019 13:13:02 -0400
Subject: [PATCH 06/22] Add first long-long test for linux386.

Skip the long-long test set on other platforms, because they don't
have long long.  Each platform would need to implement 8-byte
operations like `adi 8` in its code generator, and set long long to
8 bytes in its descr file.

The first test is for negation, addition, and subtraction.  It also
requires comparison for equality.
---
 plat/cpm/tests/build.lua          |  3 +-
 plat/linux68k/tests/build.lua     |  5 ++-
 plat/linuxmips/tests/build.lua    |  4 ++-
 plat/linuxppc/tests/build.lua     |  5 ++-
 plat/pc86/tests/build.lua         |  5 ++-
 tests/plat/build.lua              |  2 +-
 tests/plat/long-long/lladdsub_e.c | 59 +++++++++++++++++++++++++++++++
 7 files changed, 77 insertions(+), 6 deletions(-)
 create mode 100644 tests/plat/long-long/lladdsub_e.c

diff --git a/plat/cpm/tests/build.lua b/plat/cpm/tests/build.lua
index 8795262ad..81d17d4dc 100644
--- a/plat/cpm/tests/build.lua
+++ b/plat/cpm/tests/build.lua
@@ -6,7 +6,8 @@ plat_testsuite {
     method = "plat/cpm/emu+emu",
     skipsets = {
         "b",      -- B is broken on i80
-        "floats"  -- floats aren't supported
+        "floats", -- floats aren't supported
+        "long-long",
     },
     tests = { "./*.c" },
 }
diff --git a/plat/linux68k/tests/build.lua b/plat/linux68k/tests/build.lua
index 6a4b847c8..221abc8d6 100644
--- a/plat/linux68k/tests/build.lua
+++ b/plat/linux68k/tests/build.lua
@@ -4,5 +4,8 @@ plat_testsuite {
     name = "tests",
     plat = "linux68k",
     method = "plat/linux68k/emu+emu68k",
-	skipsets = {"floats"}, -- FPU instructions not supported by emulator
+    skipsets = {
+        "floats", -- FPU instructions not supported by emulator
+        "long-long",
+    },
 }
diff --git a/plat/linuxmips/tests/build.lua b/plat/linuxmips/tests/build.lua
index 529277fd9..b2b660568 100644
--- a/plat/linuxmips/tests/build.lua
+++ b/plat/linuxmips/tests/build.lua
@@ -4,5 +4,7 @@ plat_testsuite {
     name = "tests",
     plat = "linuxmips",
     method = "qemu-mipsel",
---	skipsets = {"m2"},
+    skipsets = {
+        "long-long",
+    },
 }
diff --git a/plat/linuxppc/tests/build.lua b/plat/linuxppc/tests/build.lua
index 7ea6e5bb9..fe89558a6 100644
--- a/plat/linuxppc/tests/build.lua
+++ b/plat/linuxppc/tests/build.lua
@@ -3,5 +3,8 @@ include("tests/plat/build.lua")
 plat_testsuite {
     name = "tests",
     plat = "linuxppc",
-    method = "plat/linuxppc/emu+emuppc"
+    method = "plat/linuxppc/emu+emuppc",
+    skipsets = {
+        "long-long",
+    },
 }
diff --git a/plat/pc86/tests/build.lua b/plat/pc86/tests/build.lua
index 84b70a171..e4d8286fe 100644
--- a/plat/pc86/tests/build.lua
+++ b/plat/pc86/tests/build.lua
@@ -4,5 +4,8 @@ plat_testsuite {
     name = "tests",
     plat = "pc86",
     method = "plat/pc86/emu+pc86emu",
-	skipsets = {"floats"}, -- FPU instructions not supported by emulator
+    skipsets = {
+        "floats", -- FPU instructions not supported by emulator
+        "long-long",
+    },
 }
diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index f7eecc0dd..72fd6afad 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -4,7 +4,7 @@ definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
-		sets = { type="table", default={"core", "b", "bugs", "m2", "floats"}},
+		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
 	},
diff --git a/tests/plat/long-long/lladdsub_e.c b/tests/plat/long-long/lladdsub_e.c
new file mode 100644
index 000000000..a1b248520
--- /dev/null
+++ b/tests/plat/long-long/lladdsub_e.c
@@ -0,0 +1,59 @@
+#include "test.h"
+
+struct neg {
+	long long a;
+	long long neg_a; /* -a */
+} negations[] = {
+	{0LL, 0LL},
+	{2LL, -2LL},
+	{-446020022096LL, 446020022096LL},
+};
+
+struct s_addsub {
+	long long a;
+	long long b;
+	long long a_add_b; /* a + b */
+	long long a_sub_b; /* a - b */
+} s_cases[] = {
+	{2LL, 1LL, 3LL, 1LL},
+	{2LL, -1LL, 1LL, 3LL},
+	/* a + b overflows 32 bits */
+	{1930610480LL, 842500503LL, 2773110983LL, 1088109977LL},
+	{-446020022096LL, 1037107331549LL, 591087309453LL, -1483127353645LL},
+	{-737537585551LL, -847060446507LL, -1584598032058LL, 109522860956LL},
+};
+
+struct u_addsub {
+	unsigned long long a;
+	unsigned long long b;
+	unsigned long long a_add_b;
+	unsigned long long a_sub_b;
+} u_cases[] = {
+	{2ULL, 1ULL, 3ULL, 1ULL},
+	/* a + b overflows 63 bits */
+	{6092994517831567942ULL, 3716888886436146324ULL,
+	 9809883404267714266ULL, 2376105631395421618ULL},
+};
+
+#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
+
+void _m_a_i_n(void) {
+	int i;
+
+	for (i = 0; i < LEN(negations); i++) {
+		struct neg *n = &negations[i];
+		ASSERT(n->a == -n->neg_a);
+		ASSERT(-n->a == n->neg_a);
+	}
+	for (i = 0; i < LEN(s_cases); i++) {
+		struct s_addsub *s = &s_cases[i];
+		ASSERT(s->a + s->b == s->a_add_b);
+		ASSERT(s->a - s->b == s->a_sub_b);
+	}
+	for (i = 0; i < LEN(u_cases); i++) {
+		struct u_addsub *u = &u_cases[i];
+		ASSERT(u->a + u->b == u->a_add_b);
+		ASSERT(u->a - u->b == u->a_sub_b);
+	}
+	finished();
+}

From fb9f5387b54507108f48f2617b4496e6f018e940 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Sat, 7 Sep 2019 16:20:33 -0400
Subject: [PATCH 07/22] Add long long comparisons, shifts for linux386.

Add tests for comparisons and shifts.  Also add enough integer
conversions to compile the shift test (llshift_e.c), and disable
some wrong rules for ldc and conversions.
---
 mach/i386/ncg/table              | 162 ++++++++++++++++++++++++++++++-
 tests/plat/build.lua             |   1 +
 tests/plat/long-long/llcmp_e.c   | 124 +++++++++++++++++++++++
 tests/plat/long-long/llshift_e.c |  75 ++++++++++++++
 4 files changed, 361 insertions(+), 1 deletion(-)
 create mode 100644 tests/plat/long-long/llcmp_e.c
 create mode 100644 tests/plat/long-long/llshift_e.c

diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index 42a39d15b..10fd02809 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -234,8 +234,12 @@ jmp label cost(1,4).
 proccall "call" label+rm cost(1,8).
 jxx "syntax error" label cost(1,4).
 setxx "syntax error" REG1:rw cost(2,4).
+seta REG1:rw cost(2,4).
+setb REG1:rw cost(2,4).
+setl REG1:rw cost(2,4).
 setle REG1:rw cost(2,4).
 setg REG1:rw cost(2,4).
+setne REG1:rw cost(2,4).
 lea anyreg:rw, halfindir:ro.
 lea LOCAL:rw, halfindir:ro.	/* only for register variables, UNSAFE!!! */
 leave cost(1,4).
@@ -255,12 +259,14 @@ movzxb anyreg:wo, REG+rm1:ro.
 movzx anyreg:wo, REG+rm2:ro.
 mul rmorconst:ro kills :cc eax edx cost(2,41).
 neg rmorconst:rw:cc.
+negb rm1:rw:cc.
 not rmorconst:rw.
 #ifdef REGVARS
 or LOCAL:rw:cc, rmorconst:ro.	/* only for register variables; UNSAFE !!! */
 #endif
 or rm:rw:cc, regorconst:ro.
 or anyreg:rw:cc, rmorconst:ro.
+orb REG1:rw, REG1:ro.
 pop anyreg:wo cost(1,4).
 pop rm:wo.
 push anyreg:ro cost(1,2).
@@ -276,7 +282,9 @@ sar rm:rw, ANYCON+SHIFT_CREG:ro kills :cc.
 sbb rm:rw:cc, regorconst:ro.
 sbb anyreg:rw:cc, rmorconst:ro.
 shl rm:rw, ANYCON+SHIFT_CREG:ro kills :cc.
+shld rm:rw, anyreg:ro, ANYCON+SHIFT_CREG:ro kills :cc cost(2,3).
 shr rm:rw, ANYCON+SHIFT_CREG:ro kills :cc.
+shrd rm:rw, anyreg:ro, ANYCON+SHIFT_CREG:ro kills :cc cost(2,3).
 #ifdef REGVARS
 sub LOCAL:rw:cc, rmorconst:ro.	/* only for register variables; UNSAFE !!! */
 #endif
@@ -506,7 +514,10 @@ PATTERNS
 
 pat loc					yields {ANYCON,$1}
 
+#if 0
+/* wrong because .Xtrp assumes trap < 16 */
 pat ldc					leaving loc 18 trp
+#endif
 
 pat lol					yields {LOCAL,$1,4}
 
@@ -1076,6 +1087,27 @@ with ANYCON REG
 with SHIFT_CREG REG
   gen sal %2,cl			yields %2
 
+pat sli $1==8
+with SHIFT_CREG REG REG
+  gen testb cl,{ANYCON,32}
+      jne {label,1f}
+      shld %3,%2,cl
+      sal %2,cl
+      jmp {label,2f}
+      1:
+      mov %3,%2
+      sal %3,cl
+      xor %2,%2
+      2:			yields %3 %2
+
+pat loc sli ($1&32)==0 && $2==8
+with REG REG
+  gen shld %2,%1,{ANYCON,$1&31}
+      sal %1,{ANYCON,$1&31}	yields %2 %1
+pat loc sli ($1&32)!=0 && $2==8
+with REG REG
+  gen sal %1,{ANYCON,$1&31}	yields %1 {ANYCON,0}
+
 /*
 pat sli !defined($1)
 with ACC
@@ -1089,6 +1121,29 @@ with SHIFT_CREG REG
 with ANYCON REG
   gen sar %2,%1			yields %2
 
+pat sri $1==8
+with SHIFT_CREG REG REG
+  gen testb cl,{ANYCON,32}
+      jne {label,1f}
+      shrd %2,%3,cl
+      sar %3,cl
+      jmp {label,2f}
+      1:
+      mov %2,%3
+      sar %2,cl
+      sar %3,{ANYCON,31}
+      2:			yields %3 %2
+
+pat loc sri ($1&32)==0 && $2==8
+with REG REG
+  gen shrd %1,%2,{ANYCON,$1&31}
+      sar %2,{ANYCON,$1&31}	yields %2 %1
+pat loc sri ($1&32)!=0 && $2==8
+with REG REG
+  gen mov %1,%2
+      sar %1,{ANYCON,$1&31}
+      sar %2,{ANYCON,31}	yields %2 %1
+
 /*
 pat sri !defined($1)
 with ACC
@@ -1181,6 +1236,27 @@ gen shr %2,cl			yields %2
 with ANYCON REG
 gen shr %2,%1			yields %2
 
+pat sru $1==8
+with SHIFT_CREG REG REG
+  gen testb cl,{ANYCON,32}
+      jne {label,1f}
+      shrd %2,%3,cl
+      shr %3,cl
+      jmp {label,2f}
+      1:
+      mov %2,%3
+      shr %2,cl
+      xor %3,%3
+      2:			yields %3 %2
+
+pat loc sru ($1&32)==0 && $2==8
+with REG REG
+  gen shrd %2,%1,{ANYCON,$1&31}
+      shr %1,{ANYCON,$1&31}	yields %2 %1
+pat loc sru ($1&32)!=0 && $2==8
+with REG REG
+  gen shr %2,{ANYCON,$1&31}	yields {ANYCON,0} %2
+
 /*
 pat sru !defined($1)
 with ACC STACK
@@ -2085,10 +2161,13 @@ with CXREG DXREG ACC
 kills ALL
 gen proccall {label,".cii"}		yields %3
 
+#if 0
+/* wrong when integer size > 4 */
 pat ciu					leaving cuu
 pat cui					leaving cuu
 
 pat cuu
+#endif
 
 pat loc loc cii zeq $1==1
 with GENREG STACK
@@ -2123,10 +2202,25 @@ with exact rm2
 uses reusing %1,GENREG
 gen movsx %a,%1			yields %a
 
+pat loc loc cii $1==4 && $2==8
+with ACC
+  gen cdq.			yields edx eax
+
+pat loc loc cii $1<4 && $2==8	leaving loc $1 loc 4 cii loc 4 loc $2 cii
+
+pat loc loc cii $1==8 && $2<=4
+with a_word a_word		yields %1
+
 pat loc loc ciu				leaving loc $1 loc $2 cuu
 pat loc loc cui				leaving loc $1 loc $2 cuu
 
-pat loc loc cuu $1==$2
+pat loc loc cuu $1==$2 || ($1<=4 && $2<=4)
+
+pat loc loc cuu $1<=4 && $2==8
+with a_word			yields {ANYCON,0} %1
+
+pat loc loc cuu $1==8 && $2<=4
+with a_word a_word		yields %1
 
 pat loc loc cif $1==4 && $2==4		leaving loc 4 cal ".cif4" asp 4
 pat loc loc cif $1==4 && $2==8		leaving loc 4 cal ".cif8"
@@ -2432,8 +2526,31 @@ with rmorconst register
       dec %a
       2:			yields %a
 
+pat cmi $1==8
+with rmorconst rmorconst GENREG GENREG
+  /* Let dx = 0x100 or 0x101 if a < b, 0 if a == b, 1 if a > b.
+     Shift left so 0x100 becomes the sign bit of edx. */
+  /* can't use 5th REG */
+  gen sub %3,%1
+      setne %3.1
+      sbb %4,%2
+      setl %4.2
+      setg %4.1
+      orb %4.1,%3.1
+      shl %4,{ANYCON,23}	yields %4
+
 pat cmu $1==4				 leaving cmp
 
+pat cmu $1==8
+with rmorconst rmorconst GENREG GENREG
+  gen sub %3,%1
+      setne %3.1
+      sbb %4,%2
+      setb %4.2
+      seta %4.1
+      orb %4.1,%3.1
+      shl %4,{ANYCON,23}	yields %4
+
 pat cms $1==4
 with REG rmorconst
   gen sub %1,%2			yields %1
@@ -2594,6 +2711,49 @@ pat cmp zgt	    call cmxzxx("ja","jb")
 pat cms zeq $1==4   call cmxzxx("je","je")
 pat cms zne $1==4   call cmxzxx("jne","jne")
 
+proc cmx8txxn example cmi tgt
+with GENREG REG rmorconst rmorconst
+  /* can't use 5th REG */
+  gen sub %1,%3
+      sbb %2,%4
+      setxx* %2.1
+      movzxb %2,%2.1		yields %2
+proc cmx8txxy example cmi tlt
+with rmorconst rmorconst GENREG REG
+  gen sub %3,%1
+      sbb %4,%2
+      setxx* %4.1
+      movzxb %4,%4.1		yields %4
+
+pat cmi tlt $1==8	call cmx8txxy("setl")
+pat cmi tle $1==8	call cmx8txxn("setge")
+pat cmi tge $1==8	call cmx8txxy("setge")
+pat cmi tgt $1==8	call cmx8txxn("setl")
+pat cmu tlt $1==8	call cmx8txxy("setb")
+pat cmu tle $1==8	call cmx8txxn("setae")
+pat cmu tge $1==8	call cmx8txxy("setae")
+pat cmu tgt $1==8	call cmx8txxn("setb")
+
+proc cmx8zxxn example cmi zgt
+with REG REG rmorconst rmorconst STACK
+  gen sub %1,%3
+      sbb %2,%4
+      jxx* {label,$2}
+proc cmx8zxxy example cmi zlt
+with rmorconst rmorconst REG REG STACK
+  gen sub %3,%1
+      sbb %4,%2
+      jxx* {label,$2}
+
+pat cmi zlt $1==8	call cmx8zxxy("jl")
+pat cmi zle $1==8	call cmx8zxxn("jge")
+pat cmi zge $1==8	call cmx8zxxy("jge")
+pat cmi zgt $1==8	call cmx8zxxn("jl")
+pat cmu zlt $1==8	call cmx8zxxy("jb")
+pat cmu zle $1==8	call cmx8zxxn("jae")
+pat cmu zge $1==8	call cmx8zxxy("jae")
+pat cmu zgt $1==8	call cmx8zxxn("jb")
+
 pat cms zne $1==8
 with regorconst regorconst rm rm STACK
   gen cmp %3,%1
diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index 72fd6afad..7adac3134 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -4,6 +4,7 @@ definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
+		-- added long-long/llshift_e.c
 		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
diff --git a/tests/plat/long-long/llcmp_e.c b/tests/plat/long-long/llcmp_e.c
new file mode 100644
index 000000000..8edcf727d
--- /dev/null
+++ b/tests/plat/long-long/llcmp_e.c
@@ -0,0 +1,124 @@
+#include "test.h"
+
+struct s_cmp {
+	long long a;
+	long long b;
+	int a_cmp_b; /* -1 if a < b, 0 if a == b, 1 if a > b */
+} s_cases[] = {
+	{-1LL, -1LL,  0},
+	{-1LL,  0LL, -1},
+	{-1LL,  1LL, -1},
+	{ 0LL, -1LL,  1},
+	{ 0LL,  0LL,  0},
+	{ 0LL,  1LL, -1},
+	{ 1LL, -1LL,  1},
+	{ 1LL,  0LL,  1},
+	{ 1LL,  1LL,  0},
+};
+
+struct u_cmp {
+	unsigned long long a;
+	unsigned long long b;
+	int a_cmp_b;
+} u_cases[] = {
+	{ 0ULL,  0ULL,  0},
+	{ 0ULL,  1ULL, -1},
+	{ 1ULL,  0ULL,  1},
+	{ 1ULL,  1ULL,  0},
+};
+
+#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
+
+/* Compiler should not optimize !t[a < b] as a > b. */
+int t[] = {0, 1};
+
+void _m_a_i_n(void) {
+	int i;
+#define A c->a
+#define B c->b
+
+	for (i = 0; i < LEN(s_cases); i++) {
+		struct s_cmp *c = &s_cases[i];
+		switch (c->a_cmp_b) {
+		case -1:
+			ASSERT(A < B);
+			ASSERT(A <= B);
+			ASSERT(A != B);
+			ASSERT(t[A < B]);
+			ASSERT(t[A <= B]);
+			ASSERT(!t[A == B]);
+			ASSERT(t[A != B]);
+			ASSERT(!t[A >= B]);
+			ASSERT(!t[A > B]);
+			break;
+		case 0:
+			ASSERT(A <= B);
+			ASSERT(A == B);
+			ASSERT(A >= B);
+			ASSERT(!t[A < B]);
+			ASSERT(t[A <= B]);
+			ASSERT(t[A == B]);
+			ASSERT(!t[A != B]);
+			ASSERT(t[A >= B]);
+			ASSERT(!t[A > B]);
+			break;
+		case 1:
+			ASSERT(A != B);
+			ASSERT(A >= B);
+			ASSERT(A > B);
+			ASSERT(!t[A < B]);
+			ASSERT(!t[A <= B]);
+			ASSERT(!t[A == B]);
+			ASSERT(t[A != B]);
+			ASSERT(t[A >= B]);
+			ASSERT(t[A > B]);
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+	}
+	for (i = 0; i < LEN(u_cases); i++) {
+		struct u_cmp *c = &u_cases[i];
+		switch (c->a_cmp_b) {
+		case -1:
+			ASSERT(A < B);
+			ASSERT(A <= B);
+			ASSERT(A != B);
+			ASSERT(t[A < B]);
+			ASSERT(t[A <= B]);
+			ASSERT(!t[A == B]);
+			ASSERT(t[A != B]);
+			ASSERT(!t[A >= B]);
+			ASSERT(!t[A > B]);
+			break;
+		case 0:
+			ASSERT(A <= B);
+			ASSERT(A == B);
+			ASSERT(A >= B);
+			ASSERT(!t[A < B]);
+			ASSERT(t[A <= B]);
+			ASSERT(t[A == B]);
+			ASSERT(!t[A != B]);
+			ASSERT(t[A >= B]);
+			ASSERT(!t[A > B]);
+			break;
+		case 1:
+			ASSERT(A != B);
+			ASSERT(A >= B);
+			ASSERT(A > B);
+			ASSERT(!t[A < B]);
+			ASSERT(!t[A <= B]);
+			ASSERT(!t[A == B]);
+			ASSERT(t[A != B]);
+			ASSERT(t[A >= B]);
+			ASSERT(t[A > B]);
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+	}
+	finished();
+}
+
diff --git a/tests/plat/long-long/llshift_e.c b/tests/plat/long-long/llshift_e.c
new file mode 100644
index 000000000..b5652ebb3
--- /dev/null
+++ b/tests/plat/long-long/llshift_e.c
@@ -0,0 +1,75 @@
+#include "test.h"
+
+/*
+ * i << 1 is a constant shift.  i << (1 + zero) is a variable shift,
+ * and may use a different rule in some code generators.
+ */
+int zero = 0;
+
+long long i = 121LL;
+long long j = 224690292230LL;
+unsigned long long u = 12022195707510591570ULL;
+
+void _m_a_i_n(void) {
+	ASSERT(i << 0 == 121LL);
+	ASSERT(i << (0 + zero) == 121LL);
+	ASSERT(i << 1 == 242LL);
+	ASSERT(i << (1 + zero) == 242LL);
+	ASSERT(i << 26 == 8120172544LL);
+	ASSERT(i << (26 + zero) == 8120172544LL);
+	ASSERT(i << 56 == 8718968878589280256LL);
+	ASSERT(i << (56 + zero) == 8718968878589280256LL);
+
+	ASSERT(i >> 0 == 121LL);
+	ASSERT(i >> (0 + zero) == 121LL);
+	ASSERT(i >> 1 == 60LL);
+	ASSERT(i >> (1 + zero) == 60LL);
+	ASSERT(i >> 7 == 0LL);
+	ASSERT(i >> (7 + zero) == 0LL);
+	ASSERT(i >> 37 == 0LL);
+	ASSERT(i >> (37 + zero) == 0LL);
+
+	ASSERT(-i >> 0 == -121LL);
+	ASSERT(-i >> (0 + zero) == -121LL);
+	ASSERT(-i >> 1 == -61LL);
+	ASSERT(-i >> (1 + zero) == -61LL);
+	ASSERT(-i >> 7 == -1LL);
+	ASSERT(-i >> (7 + zero) == -1LL);
+	ASSERT(-i >> 37 == -1LL);
+	ASSERT(-i >> (37 + zero) == -1LL);
+
+	ASSERT(j << 0 == 224690292230LL);
+	ASSERT(j << (0 + zero) == 224690292230LL);
+	ASSERT(j << 10 == 230082859243520LL);
+	ASSERT(j << (10 + zero) == 230082859243520LL);
+	ASSERT(j << 25 == 7539355131691663360LL);
+	ASSERT(j << (25 + zero) == 7539355131691663360LL);
+
+	ASSERT(j >> 0 == 224690292230LL);
+	ASSERT(j >> (0 + zero) == 224690292230LL);
+	ASSERT(j >> 6 == 3510785816LL);
+	ASSERT(j >> (6 + zero) == 3510785816LL);
+	ASSERT(j >> 32 == 52LL);
+	ASSERT(j >> (32 + zero) == 52LL);
+	ASSERT(j >> 38 == 0LL);
+	ASSERT(j >> (38 + zero) == 0LL);
+
+	ASSERT(-j >> 0 == -224690292230LL);
+	ASSERT(-j >> (0 + zero) == -224690292230LL);
+	ASSERT(-j >> 6 == -3510785817LL);
+	ASSERT(-j >> (6 + zero) == -3510785817LL);
+	ASSERT(-j >> 32 == -53LL);
+	ASSERT(-j >> (32 + zero) == -53LL);
+	ASSERT(-j >> 38 == -1LL);
+	ASSERT(-j >> (38 + zero) == -1LL);
+
+	ASSERT(u >> 0 == 12022195707510591570ULL);
+	ASSERT(u >> (0 + zero) == 12022195707510591570ULL);
+	ASSERT(u >> 1 == 6011097853755295785ULL);
+	ASSERT(u >> (1 + zero) == 6011097853755295785ULL);
+	ASSERT(u >> 41 == 5467061ULL);
+	ASSERT(u >> (41 + zero) == 5467061ULL);
+
+	finished();
+}
+

From eb0d5e1d6c4cab26a033eef8d7439f81eced3fb0 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Thu, 12 Sep 2019 12:26:14 -0400
Subject: [PATCH 08/22] Switch rotate_e.e from EM_WSIZE to _EM_WSIZE.

Also change EM_PSIZE to _EM_PSIZE.  I will add _EM_LLSIZE to this
test, then all 3 macros will have the leading underscore.
---
 tests/plat/core/rotate_e.e | 60 +++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/tests/plat/core/rotate_e.e b/tests/plat/core/rotate_e.e
index 0698c58a0..4fddbfd9f 100644
--- a/tests/plat/core/rotate_e.e
+++ b/tests/plat/core/rotate_e.e
@@ -1,5 +1,5 @@
 #
-    mes 2, EM_WSIZE, EM_PSIZE
+    mes 2, _EM_WSIZE, _EM_PSIZE
 
 /*
  * Tests _rol_ (rotate left) and _ror_ (rotate right).  Several back
@@ -14,7 +14,7 @@
  * You can cheat this test if _cmu_ always pushes zero.
  */
 
-#if EM_WSIZE == 2
+#if _EM_WSIZE == 2
 #define LEN2  4
     exa table2
     exa left2
@@ -55,10 +55,10 @@ val4right11
     con 2298473143U4
 
     exp $_m_a_i_n
-    pro $_m_a_i_n, EM_WSIZE
-#define i -EM_WSIZE
+    pro $_m_a_i_n, _EM_WSIZE
+#define i -_EM_WSIZE
 
-#if EM_WSIZE == 2
+#if _EM_WSIZE == 2
     /*
      * Loop for LEN2 items in table2.
      */
@@ -70,20 +70,20 @@ val4right11
     lae left2
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* left distance */
     rol 2         /* rotate left */
     lae table2
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* expected result */
     cmu 2
     zeq *2
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail
@@ -94,20 +94,20 @@ val4right11
     lae right2
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* right distance */
     ror 2         /* rotate right */
     lae table2
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* expected result */
     cmu 2
     zeq *3
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail
@@ -117,7 +117,7 @@ val4right11
     lol i
     loc LEN2
     blt *1
-#endif /* EM_WSIZE == 2 */
+#endif /* _EM_WSIZE == 2 */
 
     /*
      * Loop for LEN4 items in table4.
@@ -130,23 +130,23 @@ val4right11
     lae left4
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* left distance */
     loc 2
-    loc EM_WSIZE
+    loc _EM_WSIZE
     cii
     rol 4         /* rotate left */
     lae table4
     lol i
     loc 2
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 4         /* expected result */
     cmu 4
     zeq *5
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail
@@ -157,23 +157,23 @@ val4right11
     lae right4
     lol i
     loc 1
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 2         /* right distance */
     loc 2
-    loc EM_WSIZE
+    loc _EM_WSIZE
     cii
     ror 4         /* rotate right */
     lae table4
     lol i
     loc 2
-    sli EM_WSIZE
-    ads EM_WSIZE
+    sli _EM_WSIZE
+    ads _EM_WSIZE
     loi 4         /* expected result */
     cmu 4
     zeq *6
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail
@@ -197,7 +197,7 @@ val4right11
     cmu 4
     zeq *7
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail
@@ -212,7 +212,7 @@ val4right11
     cmu 4
     zeq *8
     loc __LINE__
-    loc EM_WSIZE
+    loc _EM_WSIZE
     loc 4
     cuu
     cal $fail

From aeb8ed53e41af8a030c03963ecc9489a6814a605 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Thu, 12 Sep 2019 13:40:07 -0400
Subject: [PATCH 09/22] Define _EM_LLSIZE, int64_t, uint64_t for linux386.

Also change UINT32_MAX in <stdint.h> from 4294967295 to 4294967295U.
The U suffix avoids a promotion to long or unsigned long if it would
fit in unsigned int.

Define _EM_LLSIZE but not EM_LLSIZE.  The leading underscore is a
convention for such macros.  If code always uses _EM_LLSIZE, we will
never need to add EM_LLSIZE.  The flag -D_EM_LLSIZE={q} is in
plat/linux386/descr, not lib/descr/fe, so platforms without long long
don't define _EM_LLSIZE.

<stdint.h> doesn't keep the old code for _EM_LSIZE == 8, because I
change it to _EM_LLSIZE == 8.  No platform had _EM_LSIZE == 8, and the
old limits like INT64_MAX were wrong.
---
 lang/cem/libcc.ansi/headers/stdint.h | 16 ++++++++--------
 plat/linux386/descr                  |  6 ++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/lang/cem/libcc.ansi/headers/stdint.h b/lang/cem/libcc.ansi/headers/stdint.h
index e2eb4c201..e21b48d7a 100644
--- a/lang/cem/libcc.ansi/headers/stdint.h
+++ b/lang/cem/libcc.ansi/headers/stdint.h
@@ -33,16 +33,16 @@ typedef unsigned long   uint32_t;
 #endif
 #define INT32_MAX       2147483647
 #define INT32_MIN       (-2147483648)
-#define UINT32_MAX      4294967295
+#define UINT32_MAX      4294967295U
 
-/* We only get int64_t if longs are 8 bytes. */
+/* We only get int64_t if long longs are 8 bytes. */
 
-#if _EM_LSIZE == 8
-typedef signed long     int64_t;
-typedef unsigned long   uint64_t;
-#define INT64_MAX       2147483647LL
-#define INT64_MIN       (-2147483648LL)
-#define UINT64_MAX      4294967295ULL
+#if _EM_LLSIZE == 8
+typedef signed long long    int64_t;
+typedef unsigned long long  uint64_t;
+#define INT64_MAX       9223372036854775807LL
+#define INT64_MIN       (-9223372036854775808LL)
+#define UINT64_MAX      18446744073709551615ULL
 
 typedef int64_t         intmax_t;
 typedef uint64_t        uintmax_t;
diff --git a/plat/linux386/descr b/plat/linux386/descr
index 8a7dc5f47..9ae17a548 100644
--- a/plat/linux386/descr
+++ b/plat/linux386/descr
@@ -10,6 +10,8 @@ var s=2
 var sa={s}
 var l={w}
 var la={w}
+var q=8
+var qa=4
 var f={w}
 var fa={w}
 var d=8
@@ -19,12 +21,12 @@ var xa={x}
 var ARCH=i386
 var PLATFORM=linux386
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
-var CPP_F=-D__unix
+var CPP_F=-D__unix -D_EM_LLSIZE={q}
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08048054
 var C_LIB={PLATFORMDIR}/libc-ansi.a
 # bitfields reversed for compatibility with (g)cc.
 # long long enabled.
-var CC_ALIGN=-Vrq8.4
+var CC_ALIGN=-Vrq{q}.{qa}
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=-m10
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

From 04427e65dc1cf3721898e219f76f913870097987 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Thu, 12 Sep 2019 19:47:51 -0400
Subject: [PATCH 10/22] Add and test rol 8, ror 8 for linux386.

These tests are in core/rotate_e.e with the other rotation tests, and
only run on platforms where _EM_LLSIZE == 8.
---
 mach/i386/ncg/table        |  44 +++++++++++
 tests/plat/core/rotate_e.e | 146 +++++++++++++++++++++++++++++++++++++
 2 files changed, 190 insertions(+)

diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index 10fd02809..184662a15 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -2339,12 +2339,56 @@ with SHIFT_CREG REG
 with ANYCON REG
   gen rol %2,%1				yields %2
 
+pat rol $1==8
+with SHIFT_CREG REG REG
+  uses REG
+  gen testb cl,{ANYCON,32}
+      je {label,1f}
+      xchg %2,%3
+      1:
+      mov %a,%3
+      shld %3,%2,cl
+      shld %2,%a,cl			yields %3 %2
+
+pat loc rol ($1&32)==0 && $2==8
+with REG REG
+  uses REG
+  gen mov %a,%2
+      shld %2,%1,{ANYCON,$1&31}
+      shld %1,%a,{ANYCON,$1&31}		yields %2 %1
+pat loc rol ($1&63)==32 && $2==8
+  leaving exg 4
+pat loc rol ($1&32)!=0 && $2==8
+  leaving loc (0-$1)&31 ror 8
+
 pat ror $1==4
 with SHIFT_CREG REG
   gen ror %2,cl				yields %2
 with ANYCON REG
   gen ror %2,%1				yields %2
 
+pat ror $1==8
+with SHIFT_CREG REG REG
+  uses REG
+  gen testb cl,{ANYCON,32}
+      je {label,1f}
+      xchg %2,%3
+      1:
+      mov %a,%2
+      shrd %2,%3,cl
+      shrd %3,%a,cl			yields %3 %2
+
+pat loc ror ($1&32)==0 && $2==8
+with REG REG
+  uses REG
+  gen mov %a,%1
+      shrd %1,%2,{ANYCON,$1&31}
+      shrd %2,%a,{ANYCON,$1&31}		yields %2 %1
+pat loc ror ($1&63)==32 && $2==8
+  leaving exg 4
+pat loc ror ($1&32)!=0 && $2==8
+  leaving loc (0-$1)&31 rol 8
+
 /*******************************************************************
  *  Group 10 : Set Instructions					   *
  *******************************************************************/
diff --git a/tests/plat/core/rotate_e.e b/tests/plat/core/rotate_e.e
index 4fddbfd9f..499d834ed 100644
--- a/tests/plat/core/rotate_e.e
+++ b/tests/plat/core/rotate_e.e
@@ -10,6 +10,7 @@
  * word size, or longer than 4 bytes.
  *  - If word size is 2, then try rotating 2-byte and 4-byte values.
  *  - If word size is 4, then try rotating 4-byte values.
+ *  - If long long size is 8, then also try 8-byte rotations.
  *
  * You can cheat this test if _cmu_ always pushes zero.
  */
@@ -54,6 +55,35 @@ val4left7
 val4right11
     con 2298473143U4
 
+#if _EM_LLSIZE == 8
+#define LEN8  4
+    exa table8
+    exa left8
+    exa right8
+table8                        /* left, right */
+    con 14079773792309488728U8  /*  0, 0  */
+    con  9712803510909425841U8  /*  1, 63 */
+    con 10409556348460427178U8  /* 32, 32 */
+    con  7039886896154744364U8  /* 63, 1  */
+left8
+    con 0I2, 1I2, 32I2, 63I2
+right8
+    con 0I2, 63I2, 32I2, 1I2
+
+    exa val8
+    exa val8left13
+    exa val8right20
+    exa val8right32
+val8
+    con 15129222862059184558U8
+val8left13
+    con 13366998808072149566U8
+val8right20
+    con  1881076513336495948U8
+val8right32
+    con 17636555387978501128U8
+#endif
+
     exp $_m_a_i_n
     pro $_m_a_i_n, _EM_WSIZE
 #define i -_EM_WSIZE
@@ -219,5 +249,121 @@ val4right11
     asp 4
 8
 
+#if _EM_LLSIZE == 8
+    /*
+     * Loop for LEN8 items in table8.
+     */
+    loc 0
+    stl i
+9
+    lae table8
+    loi 8         /* value to rotate */
+    lae left8
+    lol i
+    loc 1
+    sli _EM_WSIZE
+    ads _EM_WSIZE
+    loi 2         /* left distance */
+    loc 2
+    loc _EM_WSIZE
+    cii
+    rol 8         /* rotate left */
+    lae table8
+    lol i
+    loc 3
+    sli _EM_WSIZE
+    ads _EM_WSIZE
+    loi 8         /* expected result */
+    cmu 8
+    zeq *10
+    loc __LINE__
+    loc _EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+10
+    lae table8
+    loi 8         /* value to rotate */
+    lae right8
+    lol i
+    loc 1
+    sli _EM_WSIZE
+    ads _EM_WSIZE
+    loi 2         /* right distance */
+    loc 2
+    loc _EM_WSIZE
+    cii
+    ror 8         /* rotate right */
+    lae table8
+    lol i
+    loc 3
+    sli _EM_WSIZE
+    ads _EM_WSIZE
+    loi 8         /* expected result */
+    cmu 8
+    zeq *11
+    loc __LINE__
+    loc _EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+11
+    inl i         /* loop LEN8 times */
+    lol i
+    loc LEN8
+    blt *9
+
+    /*
+     * Rotate 8-byte value by constant distance.
+     */
+    lae val8
+    loi 8
+    loc 13
+    rol 8         /* rotate left by 13 bits */
+    lae val8left13
+    loi 8
+    cmu 8
+    zeq *12
+    loc __LINE__
+    loc _EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+12
+    lae val8
+    loi 8
+    loc 20
+    ror 8         /* rotate right by 20 bits */
+    lae val8right20
+    loi 8
+    cmu 8
+    zeq *13
+    loc __LINE__
+    loc _EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+13
+    lae val8
+    loi 8
+    loc 32
+    ror 8         /* rotate right by 32 bits */
+    lae val8right32
+    loi 8
+    cmu 8
+    zeq *14
+    loc __LINE__
+    loc _EM_WSIZE
+    loc 4
+    cuu
+    cal $fail
+    asp 4
+14
+#endif /* _EM_LLSIZE == 8 */
+
     cal $finished
     end

From 12457f63851dbbf897ad7926cd013f93d83e525e Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Mon, 16 Sep 2019 11:44:25 -0400
Subject: [PATCH 11/22] Add long long tests for bitset, convert, multiply.

---
 tests/plat/build.lua               |   2 +-
 tests/plat/long-long/llbitset_e.c  |  40 +++++++
 tests/plat/long-long/llconvert_e.c | 183 +++++++++++++++++++++++++++++
 tests/plat/long-long/llmul_e.c     |  23 ++++
 4 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100644 tests/plat/long-long/llbitset_e.c
 create mode 100644 tests/plat/long-long/llconvert_e.c
 create mode 100644 tests/plat/long-long/llmul_e.c

diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index 7adac3134..117e6a735 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -4,7 +4,7 @@ definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
-		-- added long-long/llshift_e.c
+		-- added long-long/llbitset_e.c
 		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
diff --git a/tests/plat/long-long/llbitset_e.c b/tests/plat/long-long/llbitset_e.c
new file mode 100644
index 000000000..df6075389
--- /dev/null
+++ b/tests/plat/long-long/llbitset_e.c
@@ -0,0 +1,40 @@
+#include "test.h"
+
+typedef unsigned long long set;
+
+set a       = 0xfaab001bd86b595aLL;
+set b       = 0x3abe6373562dec1cLL;
+set not_a   = 0x0554ffe42794a6a5LL;
+set a_and_b = 0x3aaa001350294818LL;
+set a_or_b  = 0xfabf637bde6ffd5eLL;
+set a_xor_b = 0xc01563688e46b546LL;
+
+/* ACK C evaluates H(constant) at compile time. */
+#define H(x) ((set)x << 32)
+
+void _m_a_i_n(void) {
+	ASSERT((~a & 0xffffffffffffffffLL) == not_a);
+	ASSERT((a & b) == a_and_b);
+	ASSERT((a | b) == a_or_b);
+	ASSERT((a ^ b) == a_xor_b);
+
+	ASSERT((a & 1)     == 0);
+	ASSERT((2 & a)     == 2);
+	ASSERT((a & ~8)    == 0xfaab001bd86b5952LL);
+	ASSERT((a & H(1))  == H(1));
+	ASSERT((H(4) & a)  == 0);
+	ASSERT((a & ~H(2)) == 0xfaab0019d86b595aLL);
+
+	ASSERT((a | 1)     == 0xfaab001bd86b595bLL);
+	ASSERT((2 | a)     == a);
+	ASSERT((a | H(4))  == 0xfaab001fd86b595aLL);
+	ASSERT((H(8) | a)  == a);
+
+	ASSERT((a ^ 1)     == 0xfaab001bd86b595bLL);
+	ASSERT((2 ^ a)     == 0xfaab001bd86b5958LL);
+	ASSERT((a ^ H(4))  == 0xfaab001fd86b595aLL);
+	ASSERT((H(8) ^ a)  == 0xfaab0013d86b595aLL);
+
+	finished();
+}
+
diff --git a/tests/plat/long-long/llconvert_e.c b/tests/plat/long-long/llconvert_e.c
new file mode 100644
index 000000000..24784e450
--- /dev/null
+++ b/tests/plat/long-long/llconvert_e.c
@@ -0,0 +1,183 @@
+#include <unistd.h>
+#include "test.h"
+
+char coal = 12;
+short stop = 3456;
+int erest = 7890;
+long way = 123456789L;
+
+signed char ter = -1;
+short sale = -9876;
+int ern = -5432;
+long itude = -1000000L;
+
+unsigned char ming = 200;
+unsigned short age = 40000U;
+unsigned int othe = 50000U;
+unsigned long shore = 3000000000UL;
+
+long long ago;
+unsigned long long ull;
+
+/*
+ * BAR may modify global variables (though it really doesn't).  The
+ * compiler should not assume that "ago" has the same value before and
+ * after BAR, but should generate code to read "ago" again.
+ */
+#define BAR write(1, "", 0)
+
+void _m_a_i_n(void) {
+	ago = coal;
+	BAR;
+	ASSERT(ago == coal);
+	ASSERT(ago == 12LL);
+
+	ago = stop;
+	BAR;
+	ASSERT(ago == stop);
+	ASSERT(ago == 3456LL);
+
+	ago = erest;
+	BAR;
+	ASSERT(ago == erest);
+	ASSERT(ago == 7890LL);
+
+	ago = way;
+	BAR;
+	ASSERT(ago == way);
+	ASSERT(ago == 123456789LL);
+
+	ull = coal;
+	BAR;
+	ASSERT(ull == coal);
+	ASSERT(ull == 12ULL);
+
+	ull = stop;
+	BAR;
+	ASSERT(ull == stop);
+	ASSERT(ull == 3456ULL);
+
+	ull = erest;
+	BAR;
+	ASSERT(ull == erest);
+	ASSERT(ull == 7890ULL);
+
+	ull = way;
+	BAR;
+	ASSERT(ull == way);
+	ASSERT(ull == 123456789ULL);
+
+	ago = ter;
+	BAR;
+	ASSERT(ago == ter);
+	ASSERT(ago == -1LL);
+
+	ago = sale;
+	BAR;
+	ASSERT(ago == sale);
+	ASSERT(ago == -9876LL);
+
+	ago = ern;
+	BAR;
+	ASSERT(ago == ern);
+	ASSERT(ago == -5432LL);
+
+	ago = itude;
+	BAR;
+	ASSERT(ago == itude);
+	ASSERT(ago == -1000000LL);
+
+	ago = ming;
+	BAR;
+	ASSERT(ago == ming);
+	ASSERT(ago == 200LL);
+
+	ago = age;
+	BAR;
+	ASSERT(ago == age);
+	ASSERT(ago == 40000LL);
+
+	ago = othe;
+	BAR;
+	ASSERT(ago == othe);
+	ASSERT(ago == 50000LL);
+
+	ago = shore;
+	BAR;
+	ASSERT(ago == shore);
+	ASSERT(ago == 3000000000LL);
+
+	ull = ming;
+	BAR;
+	ASSERT(ull == ming);
+	ASSERT(ull == 200ULL);
+
+	ull = age;
+	BAR;
+	ASSERT(ull == age);
+	ASSERT(ull == 40000ULL);
+
+	ull = othe;
+	BAR;
+	ASSERT(ull == othe);
+	ASSERT(ull == 50000ULL);
+
+	ull = shore;
+	BAR;
+	ASSERT(ull == shore);
+	ASSERT(ull == 3000000000ULL);
+
+	ago = 95;
+	BAR;
+	ter = ago;
+	sale = ago;
+	ern = ago;
+	itude = ago;
+	ming = ago;
+	age = ago;
+	othe = ago;
+	shore = ago;
+	BAR;
+	ASSERT(ter == 95);
+	ASSERT(sale == 95);
+	ASSERT(ern == 95);
+	ASSERT(itude == 95L);
+	ASSERT(ming == 95);
+	ASSERT(age == 95U);
+	ASSERT(othe == 95U);
+	ASSERT(shore == 95UL);
+
+	ago = -59;
+	BAR;
+	ter = ago;
+	sale = ago;
+	ern = ago;
+	itude = ago;
+	BAR;
+	ASSERT(ter == -59);
+	ASSERT(sale == -59);
+	ASSERT(ern == -59);
+	ASSERT(itude == -59L);
+
+	ull = 42;
+	BAR;
+	ter = ull;
+	sale = ull;
+	ern = ull;
+	itude = ull;
+	ming = ull;
+	age = ull;
+	othe = ull;
+	shore = ull;
+	BAR;
+	ASSERT(ter == 42);
+	ASSERT(sale == 42);
+	ASSERT(ern == 42);
+	ASSERT(itude == 42L);
+	ASSERT(ming == 42);
+	ASSERT(age == 42U);
+	ASSERT(othe == 42U);
+	ASSERT(shore == 42UL);
+
+	finished();
+}
diff --git a/tests/plat/long-long/llmul_e.c b/tests/plat/long-long/llmul_e.c
new file mode 100644
index 000000000..3636843f0
--- /dev/null
+++ b/tests/plat/long-long/llmul_e.c
@@ -0,0 +1,23 @@
+#include "test.h"
+
+long long a = 40000LL;
+long long b = 3000000000LL;
+long long c = 200000000000000LL;
+unsigned long long d = 60000ULL;
+
+/* products a * b, a * c, c * d */
+long long ab = 120000000000000LL;
+long long ac = 8000000000000000000LL;
+unsigned long long cd = 12000000000000000000ULL;
+
+void _m_a_i_n(void) {
+	ASSERT(a * b == ab);
+	ASSERT(-b * a == -ab);
+	ASSERT(b * -40000LL == -ab);
+	ASSERT(c * a == ac);
+	ASSERT(a * -c == -ac);
+	ASSERT(40000LL * -c == -ac);
+	ASSERT(c * d == cd);
+	ASSERT(d * c == cd);
+	finished();
+}

From f6a1e08218e2dea11505da7456ba0601c40c209d Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Mon, 16 Sep 2019 20:19:36 -0400
Subject: [PATCH 12/22] Test long long division and remainder; fix i386.

My i386 code from 893df4b gave the wrong sign to some 8-byte
remainders.  Fix by splitting .dvi8 and .rmi8 so each has its own code
to pick the sign.  They and .dvu8 and .rmu8 share a private sub
.divrem8 for unsigned division.

Improve the i386 code by using instructions like _bsr_ and _shrd_.
Change the helpers to yield a quotient in ebx:eax or a remainder in
ecx:edx; this seems more convenient, because _div_ puts its quotient
in eax and remainder in edx.
---
 mach/i386/libem/build.lua         |   2 +-
 mach/i386/libem/divrem8.s         |  63 ++++++++++++++
 mach/i386/libem/dvi8.s            | 132 ++++++------------------------
 mach/i386/libem/dvu8.s            |  20 +++++
 mach/i386/libem/rmi8.s            |  36 ++++++++
 mach/i386/ncg/table               |   8 +-
 tests/plat/build.lua              |   2 +-
 tests/plat/long-long/lldivrem_e.c |  71 ++++++++++++++++
 8 files changed, 223 insertions(+), 111 deletions(-)
 create mode 100644 mach/i386/libem/divrem8.s
 create mode 100644 mach/i386/libem/dvu8.s
 create mode 100644 mach/i386/libem/rmi8.s
 create mode 100644 tests/plat/long-long/lldivrem_e.c

diff --git a/mach/i386/libem/build.lua b/mach/i386/libem/build.lua
index b92254d96..37e05a45c 100644
--- a/mach/i386/libem/build.lua
+++ b/mach/i386/libem/build.lua
@@ -1,7 +1,7 @@
 for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
-		srcs = { "./*.s" }, -- dvi8.s
+		srcs = { "./*.s" }, -- divrem8.s
 		vars = { plat = plat },
 	}
 end
diff --git a/mach/i386/libem/divrem8.s b/mach/i386/libem/divrem8.s
new file mode 100644
index 000000000..d3334d72d
--- /dev/null
+++ b/mach/i386/libem/divrem8.s
@@ -0,0 +1,63 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.sect .text
+.define .divrem8
+
+yl=12
+yh=16
+xl=20
+xh=24
+	! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
+	! does unsigned division of x = xh:xl by y = yh:yl,
+	! yields ebx:eax = quotient, ecx:edx = remainder.
+
+.divrem8:
+	! Caller must set eax, edx, flag z like so:
+	! mov	edx,yh(esp)
+	! test	edx,edx
+	! mov	eax,xh(esp)
+	jnz	1f		! jump if y >= 2**32
+
+	! y = yl, so x / y = xh:xl / yl = qh:0 + (xl + rh) / yl
+	! where qh, rh are quotient, remainder from xh / yl.
+	mov	ecx,yl(esp)
+	xor	edx,edx		! edx:eax = xh
+	div	ecx		! eax = qh, edx = rh
+	mov	ebx,eax
+	mov	eax,xl(esp)	! edx:eax = qh:xl
+	div	ecx		! ebx:eax = qh:ql = quotient
+	xor	ecx,ecx		! ecx:edx =  0:rl = remainder
+	ret
+
+1:	! Here y >= 2**32.  Find y >> cl in [2**31, 2**32).
+	mov	ebx,yl(esp)	! edx:ebx = y
+	bsr	ecx,edx		! scan yh for highest set bit
+	incb	cl		! cl bits from cl-1 to 0
+	shrd	ebx,edx,cl	! ebx = y >> cl
+
+	! Estimate x / y as q = (x / (y >> cl)) >> cl.
+	xor	edx,edx		! edx:eax = xh
+	div	ebx		! eax = xh / (y >> cl)
+	push	eax
+	mov	eax,xl+4(esp)	! push moved xl to xl+4
+	div	ebx
+	pop	edx		! edx:eax = x / (y >> cl)
+	shrd	eax,edx,cl	! eax = q
+
+	! Calculate the remainder x - y * q.  If the subtraction
+	! overflows, then the correct quotient is q - 1, else it is q.
+	mov	ebx,yh(esp)
+	imul	ebx,eax		! ebx = yh * q
+	push	eax
+	mul	yl+4(esp)	! edx:eax = yl * q
+	add	ebx,edx		! ebx:eax = y * q
+	mov	edx,xl+4(esp)
+	mov	ecx,xh+4(esp)
+	sub	edx,eax
+	sbb	ecx,ebx		! ecx:edx = remainder
+	pop	eax		! eax = q
+	jnc	1f		! jump unless subtraction overflowed
+	dec	eax		! fix quotient
+	add	edx,yl(esp)
+	adc	ecx,yh(esp)	! fix remainder
+1:	xor	ebx,ebx		! ebx:eax = quotient
+	ret
diff --git a/mach/i386/libem/dvi8.s b/mach/i386/libem/dvi8.s
index 060f85cf1..986572525 100644
--- a/mach/i386/libem/dvi8.s
+++ b/mach/i386/libem/dvi8.s
@@ -1,115 +1,37 @@
 .sect .text; .sect .rom; .sect .data; .sect .bss
 .sect .text
-.define .dvi8, .dvu8
+.define .dvi8
 
-yl=8
-yh=12
-xl=16
-xh=20
-	! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
-	! yield edx:eax = quotient, ecx:ebx = remainder.
-
-.dvu8:
-	! Unsigned division: set di = 0 for non-negative quotient.
-	push	edi
-	xor	di,di
-	mov	eax,xh(esp)
-	mov	edx,yh(esp)
-	and	edx,edx
-	jmp	7f
+yl=4
+yh=8
+xl=12
+xh=16
+	! .dvi8 yields ebx:eax = quotient from x / y
 
 .dvi8:
-	! Signed division: replace x and y with their absolute values.
-	! Set di = 1 for negative quotient, 0 for non-negative.
-	push	edi
-	xor	di,di		! di = 0
-	mov	eax,xh(esp)
-	and	eax,eax
-	jns	1f
-	inc	di		! di = 1
+	xorb	cl,cl		! cl = 0, non-negative result
+	mov	eax,xh(esp)	! eax for .divrem8
+	test	eax,eax
+	jge	1f		! jump unless x < 0
+	incb	cl		! cl = 1, negative result
 	neg	eax
 	neg	xl(esp)
-	sbb	eax,0		! eax:xl = absolute value of x
-1:	mov	edx,yh(esp)
-	and	edx,edx
-	jns	7f
-	xor	di,1		! flip di
+	sbb	eax,0
+	mov	xh(esp),eax	! x = absolute value
+1:	mov	edx,yh(esp)	! edx for .divrem8
+	test	edx,edx		! flag z for .divrem8 when y >= 0
+	jge	1f		! jump unless y < 0
+	xorb	cl,1		! flip sign of result
 	neg	edx
 	neg	yl(esp)
-	sbb	edx,0		! edx:yl = absolute value of y
-
-7:	! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
-	! the values in xh(esp) and yh(esp) are garbage.
-	jnz	8f		! jump if y >= 2**32
-
-	! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
-	! where qh and rh are quotient, remainder from xh / yl.
-	mov	ebx,yl(esp)
-	xor	edx,edx		! edx:eax = xh
-	div	ebx		! eax = qh, edx = rh
-	mov	ecx,eax
-	mov	eax,xl(esp)
-	div	ebx		! eax = ql, edx = remainder
-	mov	ebx,edx
-	mov	edx,ecx		! edx:eax = quotient qh:ql
-	xor	ecx,ecx		! ecx:ebx = remainder
-
-9:	! Finally, if di != 0 then negate quotient, remainder.
-	and	di,di
-	jz	1f
-	neg	edx
-	neg	eax
-	sbb	edx,0		! negate quotient edx:eax
-	neg	ecx
+	sbb	edx,0		! flag z for .divrem8 when y < 0
+	mov	yh(esp),edx	! y = absolute value
+1:	push	ecx
+	call	.divrem8
+	pop	ecx
+	testb	cl,cl
+	jz	1f		! jump unless result < 0
 	neg	ebx
-	sbb	ecx,0		! negate remainder ecx:ebx
-1:	pop	edi		! caller's edi
-	ret	16
-
-8:	! We come here if y >= 2**32.
-	mov	xh(esp),eax
-	mov	yh(esp),edx
-	mov	ebx,yl(esp)	! edx:ebx = y
-
-	! Estimate x / y as q = (x / (y >> cl)) >> cl,
-	! where 2**31 <= (y >> cl) < 2**32.
-	xor	cx,cx
-1:	inc	cx
-	shr	edx,1
-	rcr	ebx,1		! edx:ebx = y >> cl
-	and	edx,edx
-	jnz	1b		! loop until y >> cl fits in ebx
-
-	! x / (y >> cl) = qh + (x + rh) / (y >> cl)
-	push	edi
-	xor	edx,edx		! edx:eax = xh
-	div	ebx		! eax = qh, edx = rh
-	mov	edi,eax
-	mov	eax,xl+4(esp)	! push edi moved xl to xl+4
-	div	ebx		! edi:eax = x / (y >> cl)
-
-	! q = (x / (y >> cl)) >> cl = esi:eax >> cl
-	shr	eax,cl
-	neg	cx		! cl = (32 - cl) modulo 32
-	shl	edi,cl
-	or	eax,edi		! eax = q
-
-	! Calculate the remainder x - q * y.  If the subtraction
-	! overflows, then the correct quotient is q - 1, else it is q.
-	mov	ecx,yh+4(esp)
-	imul	ecx,eax		! ecx = q * yh
-	mov	edi,eax
-	mul	yl+4(esp)	! edx:eax = q * yl
-	add	edx,ecx		! edx:eax = q * y
-	mov	ebx,xl+4(esp)
-	mov	ecx,xh+4(esp)	! ecx:ebx = x
-	sub	ebx,eax
-	sbb	ecx,edx		! ecx:ebx = remainder
-	jnc	1f
-	dec	edi		! fix quotient
-	add	ebx,yl+4(esp)
-	adc	ebx,yh+4(esp)	! fix remainder
-1:	mov	eax,edi
-	xor	edx,edx		! edx:eax = quotient
-	pop	edi		! negative flag
-	jmp	9b
+	neg	eax
+	sbb	ebx,0		! negate quotient ebx:eax
+1:	ret	16
diff --git a/mach/i386/libem/dvu8.s b/mach/i386/libem/dvu8.s
new file mode 100644
index 000000000..8a1f00203
--- /dev/null
+++ b/mach/i386/libem/dvu8.s
@@ -0,0 +1,20 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.sect .text
+.define .dvu8, .rmu8
+
+yl=4
+yh=8
+xl=12
+xh=16
+	! .dvu8 yields ebx:eax = quotient from x / y
+	! .rmu8 yields ecx:edx = remainder from x / y
+
+.dvu8:
+.rmu8:	
+	mov	edx,yh(esp)
+	test	edx,edx
+	mov	eax,xh(esp)	! prepare for .divrem8
+	push	ebp		! move esp
+	call	.divrem8
+	pop	ebp		! move esp
+	ret	16
diff --git a/mach/i386/libem/rmi8.s b/mach/i386/libem/rmi8.s
new file mode 100644
index 000000000..a52c282b6
--- /dev/null
+++ b/mach/i386/libem/rmi8.s
@@ -0,0 +1,36 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.sect .text
+.define .rmi8
+
+yl=4
+yh=8
+xl=12
+xh=16
+	! .rmi8 yields ecx:edx = remainder from x / y
+
+.rmi8:
+	xorb	cl,cl		! cl = 0, non-negative result
+	mov	eax,xh(esp)	! eax for .divrem8
+	test	eax,eax
+	jge	1f		! jump unless x < 0
+	incb	cl		! cl = 1, negative result
+	neg	eax
+	neg	xl(esp)
+	sbb	eax,0
+	mov	xh(esp),eax	! x = absolute value
+1:	mov	edx,yh(esp)	! edx for .divrem8
+	test	edx,edx		! flag z for .divrem8 when y >= 0
+	jge	1f		! jump unless y < 0
+	neg	edx
+	neg	yl(esp)
+	sbb	edx,0		! flag z for .divrem8 when y < 0
+	mov	yh(esp),edx	! y = absolute value
+1:	push	ecx
+	call	.divrem8
+	pop	eax
+	testb	al,al
+	jz	1f		! jump unless result < 0
+	neg	ecx
+	neg	edx
+	sbb	ecx,0		! negate remainder ecx:edx
+1:	ret	16
diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index 184662a15..789fa5c9a 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -1038,7 +1038,7 @@ with noacc ACC
 
 pat dvi $1==8
   kills ALL
-  gen proccall {label,".dvi8"}	yields edx eax
+  gen proccall {label,".dvi8"}	yields ebx eax
 
 /*
 pat dvi !defined($1)
@@ -1055,7 +1055,7 @@ with noacc ACC
 
 pat rmi $1==8
   kills ALL
-  gen proccall {label,".dvi8"}	yields ecx ebx
+  gen proccall {label,".rmi8"}	yields ecx edx
 
 /*
 pat rmi !defined($1)
@@ -1202,7 +1202,7 @@ gen div %1			yields eax
 
 pat dvu $1==8
   kills ALL
-  gen proccall {label,".dvu8"}	yields edx eax
+  gen proccall {label,".dvu8"}	yields ebx eax
 
 /*
 pat dvu !defined($1)
@@ -1218,7 +1218,7 @@ gen div %1			yields edx
 
 pat rmu $1==8
   kills ALL
-  gen proccall {label,".dvu8"}	yields ecx ebx
+  gen proccall {label,".rmu8"}	yields ecx edx
 
 /*
 pat rmu !defined($1)
diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index 117e6a735..1613255be 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -4,7 +4,7 @@ definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
-		-- added long-long/llbitset_e.c
+		-- added long-long/lldivrem_e.c
 		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
diff --git a/tests/plat/long-long/lldivrem_e.c b/tests/plat/long-long/lldivrem_e.c
new file mode 100644
index 000000000..7e75125a6
--- /dev/null
+++ b/tests/plat/long-long/lldivrem_e.c
@@ -0,0 +1,71 @@
+#include "test.h"
+
+/*
+ * Test division and remainder.  Failure code will look like
+ *  - 0x3d = id 0x3, 'd' for division
+ *  - 0x3e = id 0x3, 'e' for remainder
+ */
+
+struct s_divrem {
+	unsigned int id;
+	long long a;
+	long long b;
+	long long a_div_b; /* a / b */
+	long long a_rem_b; /* a % b */
+} s_cases[] = {
+	{0x1,  310LL,  100LL,  3LL,  10LL},
+	{0x2,  310LL, -100LL, -3LL,  10LL},
+	{0x3, -310LL,  100LL, -3LL, -10LL},
+	{0x4, -310LL, -100LL,  3LL, -10LL},
+	{0x5,  3000000000000010LL,  100LL,  30000000000000LL,  10LL},
+	{0x6,  3000000000000010LL, -100LL, -30000000000000LL,  10LL},
+	{0x7, -3000000000000010LL,  100LL, -30000000000000LL, -10LL},
+	{0x8, -3000000000000010LL, -100LL,  30000000000000LL, -10LL},
+	{0x9,  3000000000000010LL,  1000000000000LL,  3000LL,  10LL},
+	{0xa,  3000000000000010LL, -1000000000000LL, -3000LL,  10LL},
+	{0xb, -3000000000000010LL,  1000000000000LL, -3000LL, -10LL},
+	{0xc, -3000000000000010LL, -1000000000000LL,  3000LL, -10LL},
+	/*
+	 * In next 3 cases, i386 tries (a / (b >> 13)) >> 13 = 8,
+	 * may need to correct the quotient from 8 to 7.
+	 */
+	{0x11, 0x864200000000LL, 0x10c840000000LL, 8LL, 0LL},
+	{0x12, 0x864200000000LL, 0x10c840000001LL, 7LL, 0x10c83ffffff9LL},
+	{0x13, 0x864200000000LL, 0x10c840001fffLL, 7LL, 0x10c83fff2007LL},
+};
+
+struct u_divrem {
+	unsigned int id;
+	unsigned long long a;
+	unsigned long long b;
+	unsigned long long a_div_b;
+	unsigned long long a_rem_b;
+} u_cases[] = {
+	{0x81, 310ULL, 100ULL, 3ULL, 10ULL},
+	{0x82, 3000000000000010ULL, 100ULL, 30000000000000ULL, 10ULL},
+	{0x83, 3000000000000010ULL, 1000000000000ULL, 3000ULL, 10ULL},
+	{0x91, 0x8000000000000000ULL, 3ULL, 0x2aaaaaaaaaaaaaaaULL, 2ULL},
+	{0x92, 0xffffffffffffffffULL, 3ULL, 0x5555555555555555ULL, 0ULL},
+};
+
+#define LEN(ary) (sizeof(ary) / sizeof(ary[0]))
+
+void _m_a_i_n(void) {
+	int i;
+
+	for (i = 0; i < LEN(s_cases); i++) {
+		struct s_divrem *s = &s_cases[i];
+		if (s->a / s->b != s->a_div_b)
+			fail((s->id << 4) | 0xd);
+		if (s->a % s->b != s->a_rem_b)
+			fail((s->id << 4) | 0xe);
+	}
+	for (i = 0; i < LEN(u_cases); i++) {
+		struct u_divrem *u = &u_cases[i];
+		if (u->a / u->b != u->a_div_b)
+			fail((u->id << 4) | 0xd);
+		if (u->a % u->b != u->a_rem_b)
+			fail((u->id << 4) | 0xe);
+	}
+	finished();
+}

From 485faa29449884a1e756d0e6e7edbc3a650b6e08 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Mon, 16 Sep 2019 21:35:38 -0400
Subject: [PATCH 13/22] Remove extra conversion of shift count in ACK C.

Given `long long o1` and `unsigned int o2`, then `o1 << o2` was
converting o2 to long long and then to int.  Remove the first
conversion and just convert o2 to int.
---
 lang/cem/cemcom.ansi/arith.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lang/cem/cemcom.ansi/arith.c b/lang/cem/cemcom.ansi/arith.c
index 21a86faf3..654ea6dc2 100644
--- a/lang/cem/cemcom.ansi/arith.c
+++ b/lang/cem/cemcom.ansi/arith.c
@@ -143,7 +143,11 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 		converted to the signed type, else both operands are
 		converted to an unsigned type.
 	*/
-	if (t1 == LNGLNG && u1 && (t2 != LNGLNG || !u2))
+	if (shifting) {
+		/*	In shifts like o1 << o2, never convert o1,
+			and let ch3bin() convert o2 to int.
+		*/
+	} else if (t1 == LNGLNG && u1 && (t2 != LNGLNG || !u2))
 		convert2 = ulnglng_type;
 	else if (t2 == LNGLNG && u2 && (t1 != LNGLNG || !u1))
 		convert1 = ulnglng_type;
@@ -180,7 +184,7 @@ void arithbalance(register struct expr **e1p, int oper, register struct expr **e
 	else if (t2 == LONG && t1 != LONG)
 		convert1 = long_type;
 
-	if (convert1 && !shifting)	/* ??? */
+	if (convert1)
 		t1 = int2int(e1p, convert1);
 	if (convert2)
 		t2 = int2int(e2p, convert2);

From 6f84bc1dcfa8ae871a1bf18c3092109dbe3bfadb Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Wed, 18 Sep 2019 12:46:11 -0400
Subject: [PATCH 14/22] Add more cases to lladdsub_e.c

Add cases with long long a, b in hexadecimal, where it is more obvious
whether or not a + b or a - b carries to or borrows from bit 32.  Add
failure codes to identify each case.
---
 tests/plat/long-long/lladdsub_e.c | 60 ++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/tests/plat/long-long/lladdsub_e.c b/tests/plat/long-long/lladdsub_e.c
index a1b248520..b25c0861c 100644
--- a/tests/plat/long-long/lladdsub_e.c
+++ b/tests/plat/long-long/lladdsub_e.c
@@ -1,37 +1,59 @@
 #include "test.h"
 
+/*
+ * Failure code will look like
+ *  -  0x3e =  id 0x3, 'e' or 'f' for negation
+ *  - 0x43a = id 0x43, 'a' for addition
+ *  - 0x43b = id 0x43, 'b' for subtraction
+ */
+
 struct neg {
+	unsigned int id;
 	long long a;
 	long long neg_a; /* -a */
 } negations[] = {
-	{0LL, 0LL},
-	{2LL, -2LL},
-	{-446020022096LL, 446020022096LL},
+	{0x1, 0LL, 0LL},
+	{0x2, 2LL, -2LL},
+	{0x3, -446020022096LL, 446020022096LL},
 };
 
 struct s_addsub {
+	unsigned int id;
 	long long a;
 	long long b;
 	long long a_add_b; /* a + b */
 	long long a_sub_b; /* a - b */
 } s_cases[] = {
-	{2LL, 1LL, 3LL, 1LL},
-	{2LL, -1LL, 1LL, 3LL},
+	{0x41, 2LL, 1LL, 3LL, 1LL},
+	{0x42, 2LL, -1LL, 1LL, 3LL},
+	{0x43, 1LL, 2LL, 3LL, -1LL},
 	/* a + b overflows 32 bits */
-	{1930610480LL, 842500503LL, 2773110983LL, 1088109977LL},
-	{-446020022096LL, 1037107331549LL, 591087309453LL, -1483127353645LL},
-	{-737537585551LL, -847060446507LL, -1584598032058LL, 109522860956LL},
+	{0x44, 0xa0000000LL, 0x60000000LL, 0x100000000LL, 0x40000000LL},
+	{0x45, 1930610480LL, 842500503LL, 2773110983LL, 1088109977LL},
+	/* a + b doesn't carry to bit 32; a - b does borrow from bit 32 */
+	{0x51, 0x100000000LL, 0x50000000LL, 0x150000000LL, 0xb0000000LL},
+	{0x52, -446020022096LL, 1037107331549LL,
+	 591087309453LL, -1483127353645LL},
+	/* a + b does carry to bit 32; a - b doesn't borrow from bit 32 */
+	{0x53, 0x3e0000000LL, 0x20000000LL, 0x400000000LL, 0x3c0000000LL},
+	{0x54, -180587215220LL, 249361198573LL,
+	 68773983353LL, -429948413793LL},
+	/* a + b does carry to bit 32; a - b does borrow from bit 32 */
+	{0x55, 0x370000000LL, 0x90000000LL, 0x400000000LL, 0x2e0000000LL},
+	{0x56, -737537585551LL, -847060446507LL,
+	 -1584598032058LL, 109522860956LL},
 };
 
 struct u_addsub {
+	unsigned int id;
 	unsigned long long a;
 	unsigned long long b;
 	unsigned long long a_add_b;
 	unsigned long long a_sub_b;
 } u_cases[] = {
-	{2ULL, 1ULL, 3ULL, 1ULL},
+	{0x81, 2ULL, 1ULL, 3ULL, 1ULL},
 	/* a + b overflows 63 bits */
-	{6092994517831567942ULL, 3716888886436146324ULL,
+	{0x82, 6092994517831567942ULL, 3716888886436146324ULL,
 	 9809883404267714266ULL, 2376105631395421618ULL},
 };
 
@@ -42,18 +64,24 @@ void _m_a_i_n(void) {
 
 	for (i = 0; i < LEN(negations); i++) {
 		struct neg *n = &negations[i];
-		ASSERT(n->a == -n->neg_a);
-		ASSERT(-n->a == n->neg_a);
+		if (n->a != -n->neg_a)
+			fail((n->id << 4) | 0xe);
+		if (-n->a != n->neg_a)
+			fail((n->id << 4) | 0xf);
 	}
 	for (i = 0; i < LEN(s_cases); i++) {
 		struct s_addsub *s = &s_cases[i];
-		ASSERT(s->a + s->b == s->a_add_b);
-		ASSERT(s->a - s->b == s->a_sub_b);
+		if (s->a + s->b != s->a_add_b)
+			fail((s->id << 4) | 0xa);
+		if (s->a - s->b != s->a_sub_b)
+			fail((s->id << 4) | 0xb);
 	}
 	for (i = 0; i < LEN(u_cases); i++) {
 		struct u_addsub *u = &u_cases[i];
-		ASSERT(u->a + u->b == u->a_add_b);
-		ASSERT(u->a - u->b == u->a_sub_b);
+		if (u->a + u->b != u->a_add_b)
+			fail((u->id << 4) | 0xa);
+		if (u->a - u->b != u->a_sub_b)
+			fail((u->id << 4) | 0xb);
 	}
 	finished();
 }

From f0a2c84d9358353d97bc92bb47ec702652033461 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Wed, 18 Sep 2019 14:09:51 -0400
Subject: [PATCH 15/22] Tweak i386 rules for adi 8, sli 8, sru 8.

Add EXACT to the rule for adi 8, in the same way that the old rules
for and 8, ior 8, xor 8 have EXACT.

Add rules for sli 8 and sru 8 when shifting 32 bits, and add
assertions in llshift_e.c to test these rules.
---
 mach/i386/ncg/table              | 6 +++++-
 tests/plat/long-long/llshift_e.c | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index 789fa5c9a..f18bc3e7d 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -973,7 +973,7 @@ with EXACT rmorconst const
   gen add %a,%2			yields %a
 
 pat adi $1==8
-with REG REG rmorconst rmorconst
+with EXACT REG REG rmorconst rmorconst
   gen add %1,%3
       adc %2,%4			yields %2 %1
 with rmorconst rmorconst REG REG
@@ -1104,6 +1104,8 @@ pat loc sli ($1&32)==0 && $2==8
 with REG REG
   gen shld %2,%1,{ANYCON,$1&31}
       sal %1,{ANYCON,$1&31}	yields %2 %1
+pat loc sli ($1&63)==32 && $2==8
+with a_word a_word		yields %1 {ANYCON,0}
 pat loc sli ($1&32)!=0 && $2==8
 with REG REG
   gen sal %1,{ANYCON,$1&31}	yields %1 {ANYCON,0}
@@ -1253,6 +1255,8 @@ pat loc sru ($1&32)==0 && $2==8
 with REG REG
   gen shrd %2,%1,{ANYCON,$1&31}
       shr %1,{ANYCON,$1&31}	yields %2 %1
+pat loc sru ($1&63)==32 && $2==8
+with a_word a_word		yields {ANYCON,0} %2
 pat loc sru ($1&32)!=0 && $2==8
 with REG REG
   gen shr %2,{ANYCON,$1&31}	yields {ANYCON,0} %2
diff --git a/tests/plat/long-long/llshift_e.c b/tests/plat/long-long/llshift_e.c
index b5652ebb3..07790ad36 100644
--- a/tests/plat/long-long/llshift_e.c
+++ b/tests/plat/long-long/llshift_e.c
@@ -17,6 +17,8 @@ void _m_a_i_n(void) {
 	ASSERT(i << (1 + zero) == 242LL);
 	ASSERT(i << 26 == 8120172544LL);
 	ASSERT(i << (26 + zero) == 8120172544LL);
+	ASSERT(i << 32 == 519691042816LL);
+	ASSERT(i << (32 + zero) == 519691042816LL);
 	ASSERT(i << 56 == 8718968878589280256LL);
 	ASSERT(i << (56 + zero) == 8718968878589280256LL);
 
@@ -67,9 +69,10 @@ void _m_a_i_n(void) {
 	ASSERT(u >> (0 + zero) == 12022195707510591570ULL);
 	ASSERT(u >> 1 == 6011097853755295785ULL);
 	ASSERT(u >> (1 + zero) == 6011097853755295785ULL);
+	ASSERT(u >> 32 == 2799135564ULL);
+	ASSERT(u >> (32 + zero) == 2799135564ULL);
 	ASSERT(u >> 41 == 5467061ULL);
 	ASSERT(u >> (41 + zero) == 5467061ULL);
 
 	finished();
 }
-

From fd27acb487980e715c1adb8298f85f58a0a478b0 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 24 Sep 2019 10:44:48 -0400
Subject: [PATCH 16/22] Fix assembly of bfffo d1[0:32], d3

The assembler wrongly defined _bfexts_ and _bfffo_ with the same bits
as _bfextu_; this turned all bfexts and bfffo instructions into
bfextu.  Motorola's 68k Programmer's Reference Manual (1992) gives
different bits for bfexts, but still has wrong bits for bfffo.  Change
bfexts and bfffo to match the 68k emulators musahi, aranym, syn68k.

The bitfield width is from 1 to 32, not 0 to 31, so move the warning
from 32 to 0.  This doesn't change the warning message, so it will say
that 0 is "too big", when 0 is really too small.
---
 mach/m68020/as/mach2.c |  2 +-
 mach/m68020/as/mach3.c |  4 ++--
 mach/m68020/as/mach4.c | 11 ++++++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/mach/m68020/as/mach2.c b/mach/m68020/as/mach2.c
index 4e658ffcc..9a8b4fb44 100644
--- a/mach/m68020/as/mach2.c
+++ b/mach/m68020/as/mach2.c
@@ -83,6 +83,6 @@
 
 %type <y_word> bcdx op_ea regs rrange 
 %type <y_word> reg sizedef sizenon creg
-%type <y_word> off_width abs31 bd_areg_index
+%type <y_word> off_width off31 wid31 bd_areg_index
 %type <y_word> areg_index areg scale cp_cond fc mask
 %type <y_word> fsize fregs fcregs frlist frrange
diff --git a/mach/m68020/as/mach3.c b/mach/m68020/as/mach3.c
index e77745707..95311d911 100644
--- a/mach/m68020/as/mach3.c
+++ b/mach/m68020/as/mach3.c
@@ -68,8 +68,8 @@
 {0,	BITFIELD,	0166300,	"bfclr"},
 {0,	BITFIELD,	0167300,	"bfset"},
 {0,	BF_TO_D,	0164700,	"bfextu"},
-{0,	BF_TO_D,	0164700,	"bfexts"},
-{0,	BF_TO_D,	0164700,	"bfffo"},
+{0,	BF_TO_D,	0165700,	"bfexts"},
+{0,	BF_TO_D,	0166700,	"bfffo"},  /* not 0164700 */
 {0,	BFINS,		0167700,	"bfins"},
 
 {0,	SHIFT,		0160340,	"asr"},
diff --git a/mach/m68020/as/mach4.c b/mach/m68020/as/mach4.c
index 7638a680e..3103ad5e1 100644
--- a/mach/m68020/as/mach4.c
+++ b/mach/m68020/as/mach4.c
@@ -225,12 +225,17 @@ creg	:	CREG
 off_width		/* note: these should be curly brackets, but that would
 			 * leave us without brackets for expressions.
 			 */
-	:	'[' abs31 ':' abs31 ']'
+	:	'[' off31 ':' wid31 ']'
 			{	$$ = ($2<<6) | $4;
 			}
 	;
-abs31	:	DREG	{	$$ = 040 | $1;}
-	|	absexp	{	fit(fit5($1));
+off31	:	DREG	{	$$ = 040 | $1;}
+	|	absexp	{	fit(fit5($1));     /* 0 to 31 */
+				$$ = low5($1);
+			}
+	;
+wid31	:	DREG	{	$$ = 040 | $1;}
+	|	absexp	{	fit(fit5($1) - 1); /* 1 to 32 */
 				$$ = low5($1);
 			}
 	;

From e867861f6d84947514a90396d426a6472ffc3131 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Tue, 24 Sep 2019 13:32:17 -0400
Subject: [PATCH 17/22] Add 8-byte long long for linux68k.

Add rules for 8-byte integers to m68020 ncg.  Add 8-byte long long to
ACK C on linux68k.  Enable long-long tests for linux68k.  The tests
pass in our emulator using musahi; I don't have a real 68k processor
and haven't tried other emulators.

Still missing are conversions between 8-byte integers and any size of
floats.  The long-long tests don't cover these conversions, and our
emulator can't do floating-point.

Our build always enables TBL68020 and uses word size 4.  Without
TBL68020, 8-byte multiply and divide are missing.  With word size 2,
some conversions between 2-byte and 8-byte integers are missing.

Fix .cii in libem, which didn't work when converting from 1-byte or
2-byte integers.  Now .cii and .cuu work, but also add some rules to
skip .cii and .cuu when converting 8-byte integers.  The new rule for
loc 4 loc 8 cii `with test_set4` exposes a bug: the table may believe
that the condition codes test a 4-byte register when they only test a
word or byte, and this incorrect test may describe an unsigned word or
byte as negative.  Another rule `with exact test_set1+test_set2` works
around the bug by ignoring the negative flag, because a zero-extended
word or byte is never negative.

The old rules for comparison and logic do work with 8-byte integers
and bitsets, but add some specific 8-byte rules to skip libem calls or
loops.  There were no rules for 8-byte arithmetic, shift, or rotate;
so add some.  There is a register shortage, because the table requires
preserving d3 to d7, leaving only 3 data registers (d0, d1, d2) for
8-byte operations.  Because of the shortage, the code may move data to
an address register, or read a memory location more than once.

The multiplication and division code are translations of the i386
code.  They pass the tests, but might not give the best performance on
a real 68k processor.
---
 mach/m68020/libem/build.lua   |   2 +-
 mach/m68020/libem/cii.s       |  19 +-
 mach/m68020/libem/divrem8.s   |  76 +++++++
 mach/m68020/libem/dvi8.s      |  34 +++
 mach/m68020/libem/dvu8.s      |  20 ++
 mach/m68020/libem/rmi8.s      |  35 +++
 mach/m68020/libem/rmu8.s      |  22 ++
 mach/m68020/ncg/table         | 404 +++++++++++++++++++++++++++++++++-
 plat/linux68k/descr           |   7 +-
 plat/linux68k/tests/build.lua |   1 -
 10 files changed, 608 insertions(+), 12 deletions(-)
 create mode 100644 mach/m68020/libem/divrem8.s
 create mode 100644 mach/m68020/libem/dvi8.s
 create mode 100644 mach/m68020/libem/dvu8.s
 create mode 100644 mach/m68020/libem/rmi8.s
 create mode 100644 mach/m68020/libem/rmu8.s

diff --git a/mach/m68020/libem/build.lua b/mach/m68020/libem/build.lua
index d17adcd92..d5c9af8ad 100644
--- a/mach/m68020/libem/build.lua
+++ b/mach/m68020/libem/build.lua
@@ -2,7 +2,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s",
+			"./*.s", -- added divrem8.s
 			"./*.c"
 		},
 		vars = { plat = plat },
diff --git a/mach/m68020/libem/cii.s b/mach/m68020/libem/cii.s
index 01757dfc5..b3dd8c0be 100644
--- a/mach/m68020/libem/cii.s
+++ b/mach/m68020/libem/cii.s
@@ -13,12 +13,19 @@
 	sub.l	d0, sp		! pop extra bytes
 	jmp	(a0)
 1:
-	clr.l	d1
-	tst.l	(sp)
-	bne	4f
-	not.l	d1		! d1 contains sign of source
-4:
-	asr.l	#2, d0
+	move.l	(sp), d1
+	lsr.l	#1, d0
+	bcs	1f		! branch if source size == 1
+	lsr.l	#1, d0
+	bcs	2f		! branch if source size == 2
+	tst.l	d1
+	bra	4f
+1:	lsr.l	#1, d0		! size difference / 4
+	ext.w	d1
+2:	ext.l	d1
+	move.l	d1, (sp)
+4:	slt	d1
+	extb.l	d1		! d1 contains sign of source
 	sub.l	#1, d0
 2:
 	move.l	d1, -(sp)
diff --git a/mach/m68020/libem/divrem8.s b/mach/m68020/libem/divrem8.s
new file mode 100644
index 000000000..557924098
--- /dev/null
+++ b/mach/m68020/libem/divrem8.s
@@ -0,0 +1,76 @@
+.define .divrem8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+
+yh=16
+yl=20
+xh=24
+xl=28
+	! This private sub for .dvi8, .dvu8, .rmi8, .rmu8
+	! does unsigned division of x = xh:xl by y = yh:yl,
+	! yields d0:d1 = quotient, d2:d3 = remainder.
+
+.sect .text
+.divrem8:
+	! Caller must set d0, d1 like so:
+	! mov.l (xh, sp), d0
+	! mov.l (yh, sp), d1
+	tst.l	d1
+	bne	1f		! branch if y >= 2**32
+
+	! y = yl, so x / y = xh:xl / yl = qh:0 + rh:xl / yl
+	! where qh, rh are quotient, remainder from xh / yl.
+	move.l	(xl, sp), d1
+	move.l	(yl, sp), d2
+	clr.l	d3		! d3:d0 = xh
+	divu.l	d2, d3:d0	! d0 =  0:xh / yl, d3 = rh
+	divu.l	d2, d3:d1	! d1 = rh:xl / yl, so d0:d1 = x / y
+	clr.l	d2		! remainder in d2:d3
+	rts
+
+1:	! Here y >= 2**32.
+	move.l	d0, a0		! save xh
+	move.l	d1, a1		! save yh
+	move.l	d7, a2		! save caller's d7
+
+	! Find y >> right in [2**31, 2**32).
+	move.l	(yl, sp), d2
+	bfffo	d1[0:32], d3	! find highest set bit in yh
+	lsl.l	d3, d1		! shift yh left
+	bset	#5, d3
+	neg.l	d3		! right = (32 - left) modulo 64
+	lsr.l	d3, d2		! shift yl right
+	or.l	d1, d2		! d2 = y >> right
+
+	! Estimate x / y as q = (x / (y >> right)) >> right.
+	move.l	(xl, sp), d1
+	clr.l	d7
+	divu.l	d2, d7:d0
+	divu.l	d2, d7:d1	! d0:d1 = x / (y >> right)
+	lsr.l	d3, d1
+	bset	#5, d3
+	neg.l	d3
+	lsl.l	d3, d0
+	or.l	d0, d1		! d1 = q
+
+	! Calculate the remainder x - y * q.  If the subtraction
+	! overflows, then the correct quotient is q - 1, else it is q.
+	move.l	a1, d3		! yh
+	mulu.l	d1, d3		! yh * q
+	move.l	(yl, sp), d7
+	mulu.l	d1, d0:d7	! yl * q
+	add.l	d3, d0		! d0:d7 = y * q
+	move.l	(xl, sp), d3
+	move.l	a0, d2		! d2:d3 = x
+	sub.l	d7, d3
+	subx.l	d0, d2		! d2:d3 = x - y * q
+	bcc	1f		! branch unless subtraction overflowed
+	sub.l	#1, d1		! fix quotient
+	move.l	a1, d7		! yh
+	add.l	(yl, sp), d3
+	addx.l	d7, d2		! fix remainder
+1:	clr.l	d0		! d0:d1 = quotient
+	move.l	a2, d7		! restore caller's d7
+	rts
diff --git a/mach/m68020/libem/dvi8.s b/mach/m68020/libem/dvi8.s
new file mode 100644
index 000000000..03fc3e985
--- /dev/null
+++ b/mach/m68020/libem/dvi8.s
@@ -0,0 +1,34 @@
+.define .dvi8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+
+yh=8
+yl=12
+xh=16
+xl=20
+	! .dvi8 yields d0:d1 = quotient from x / y
+
+.sect .text
+.dvi8:
+	move.l	d3, -(sp)	! preserve caller's d3
+	clr.l	d2		! d2 = 0, non-negative result
+	move.l	(xh, sp), d0	! d0 for .divrem8
+	bge	1f
+	move.l	#1, d2		! d2 = 1, negative result
+	neg.l	(xl, sp)
+	negx.l	d0		! x = absolute value
+1:	move.l	(yh, sp), d1	! d1 for .divrem8
+	bge	1f
+	bchg	#0, d2		! flip sign of result
+	neg.l	(yl, sp)
+	negx.l	d1		! y = absolute value
+1:	move.l	d2, -(sp)
+	jsr	(.divrem8)
+	move.l	(sp)+, d2
+	beq	1f		! branch unless result < 0
+	neg.l	d1
+	negx.l	d0		! negate quotient d0:d1
+1:	move.l	(sp)+, d3
+	rtd	#16
diff --git a/mach/m68020/libem/dvu8.s b/mach/m68020/libem/dvu8.s
new file mode 100644
index 000000000..00ec6b552
--- /dev/null
+++ b/mach/m68020/libem/dvu8.s
@@ -0,0 +1,20 @@
+.define .dvu8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+
+yh=8
+xh=16
+	! .dvu8 yields d0:d1 = quotient from x / y
+
+.sect .text
+.dvu8:
+	move.l	d3, -(sp)	! preserve caller's d3
+	move.l	(xh, sp), d0
+	move.l	(yh, sp), d1
+	sub.l	#4, sp
+	jsr	(.divrem8)
+	add.l	#4, sp
+	move.l	(sp)+, d3
+	rtd	#16
diff --git a/mach/m68020/libem/rmi8.s b/mach/m68020/libem/rmi8.s
new file mode 100644
index 000000000..ffb672b2c
--- /dev/null
+++ b/mach/m68020/libem/rmi8.s
@@ -0,0 +1,35 @@
+.define .rmi8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+
+yh=8
+yl=12
+xh=16
+xl=20
+	! .rmi8 yields d0:d1 = remainder from x / y
+
+.sect .text
+.rmi8:
+	move.l	d3, -(sp)	! preserve caller's d3
+	clr.l	d2		! d2 = 0, non-negative result
+	move.l	(xh, sp), d0	! d0 for .divrem8
+	bge	1f
+	move.l	#1, d2		! d2 = 1, negative result
+	neg.l	(xl, sp)
+	negx.l	d0		! x = absolute value
+1:	move.l	(yh, sp), d1	! d1 for .divrem8
+	bge	1f
+	neg.l	(yl, sp)
+	negx.l	d1		! y = absolute value
+1:	move.l	d2, -(sp)
+	jsr	(.divrem8)
+	move.l	(sp)+, d0
+	beq	1f		! branch unless result < 0
+	neg.l	d3
+	negx.l	d2		! negate quotient d3:d2
+1:	move.l	d3, d1
+	move.l	d2, d0
+	move.l	(sp)+, d3
+	rtd	#16
diff --git a/mach/m68020/libem/rmu8.s b/mach/m68020/libem/rmu8.s
new file mode 100644
index 000000000..823a2778c
--- /dev/null
+++ b/mach/m68020/libem/rmu8.s
@@ -0,0 +1,22 @@
+.define .rmu8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+
+yh=8
+xh=16
+	! .rmu8 yields d0:d1 = remainder from x / y
+
+.sect .text
+.rmu8:
+	move.l	d3, -(sp)	! preserve caller's d3
+	move.l	(xh, sp), d0
+	move.l	(yh, sp), d1
+	sub.l	#4, sp
+	jsr	(.divrem8)
+	add.l	#4, sp
+	move.l	d3, d1
+	move.l	d2, d0
+	move.l	(sp)+, d3
+	rtd	#16
diff --git a/mach/m68020/ncg/table b/mach/m68020/ncg/table
index 9aede9929..fe1099078 100644
--- a/mach/m68020/ncg/table
+++ b/mach/m68020/ncg/table
@@ -612,6 +612,7 @@ add_l  "add.l"	conreg4:ro,	alterable4:rw:cc	cost(2,6).
 add_w  "add.w"	any2:ro,	D_REG+LOCAL:rw:cc	cost(2,3).
 add_w  "add.w"	conreg2:ro,	alterable2:rw:cc	cost(2,6).
 #endif
+addx_l "addx.l" D_REG4:ro,	D_REG4:rw kills :cc	cost(2,3).
 and_l  "and.l"	data4:ro,	D_REG4:rw:cc		cost(2,3).
 and_l  "and.l"	D_REG4:ro,	memalt4:rw:cc		cost(2,6).
 and_l  "and.l"	consts4:ro,	datalt4:rw:cc		cost(2,6).
@@ -628,6 +629,7 @@ asr   "asr #1,"	memalt2:rw:cc				cost(2,4).
 asl_w  "asl.w"	shconreg:ro,	D_REG:rw:cc		cost(2,5).
 asr_w  "asr.w"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 #endif
+bchg		const:ro,	D_REG:rw kills:cc	cost(2,4).
 bclr		const:ro,	D_REG:rw kills:cc	cost(2,4).
 bra		label					cost(2,5).
 bcc		label					cost(2,5).
@@ -671,14 +673,15 @@ eor_l  "eor.l"	conreg4:ro,	datalt4:rw:cc		cost(2,6).
 #if WORD_SIZE==2
 eor_w  "eor.w"	conreg2:ro,	datalt2:rw:cc		cost(2,4).
 #endif
+exg		genreg4:rw,	genreg4:rw		cost(2,3).
 /* in the next two instructions: LOCAL only allowed if register var */
 ext_l  "ext.l"	D_REG+LOCAL+D_REG4:rw:cc		cost(2,2).
 ext_w  "ext.w"	D_REG+LOCAL+D_REG4:rw:cc		cost(2,2).
 jmp		address+control4			cost(2,0).
 jsr		address+control4 kills :cc d0 d1 d2 a0 a1 cost(2,3).
 lea		address+control4:ro, A_REG+areg:wo	cost(2,0).
+lsl_l  "lsl.l"	shconreg:ro,	D_REG4:rw:cc		cost(2,4).
 /*
-lsl_l  "lsl.l"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 lsl   "lsl #1,"	memalt2:rw:cc				cost(2,4).
 */
 lsr_l  "lsr.l"	shconreg:ro,	D_REG4:rw:cc		cost(2,4).
@@ -709,6 +712,8 @@ neg_l  "neg.l"	memory4:rw:cc				cost(2,6).
 #if WORD_SIZE==2
 neg_w  "neg.w"	memory2:rw:cc				cost(2,6).
 #endif
+negx_l "negx.l" D_REG4:rw:cc				cost(2,3).
+negx_l "negx.l" memory4:rw:cc				cost(2,6).
 not_l  "not.l"	D_REG4:rw:cc				cost(2,3).
 not_l  "not.l"	memory4:rw:cc				cost(2,6).
 #if WORD_SIZE==2
@@ -733,6 +738,7 @@ ror_w  "ror.w"	shconreg:ro,	D_REG:rw:cc		cost(2,4).
 #endif
 roxl "roxl #1,"	memalt2:rw:cc				cost(2,4).
 roxr "roxr #1,"	memalt2:rw:cc				cost(2,4).
+slt		datalt1:rw				cost(2,3).
 sne		datalt1:rw				cost(2,3).
 sub_l  "sub.l"	any4:ro,	D_REG4:rw:cc		cost(2,3).
 sub_l  "sub.l"	any4+areg:ro,	A_REG+areg:rw		cost(2,3).
@@ -740,6 +746,9 @@ sub_l  "sub.l"	conreg4:ro,	alterable4:rw:cc	cost(2,6).
 #if WORD_SIZE==2
 sub_w  "sub.w"	any2:ro,	D_REG+LOCAL:rw:cc	cost(2,3).
 sub_w  "sub.w"	conreg2:ro,	alterable2:rw:cc	cost(2,6).
+#endif
+subx_l "subx.l" D_REG4:ro,	D_REG4:rw kills :cc	cost(2,3).
+#if WORD_SIZE==2
 /* On a swap, we only want the lower part of D_REG, so don't set cc */
 swap		D_REG:rw kills :cc			cost(2,2).
 #endif
@@ -773,6 +782,7 @@ divs_l "divs.l" data4:ro,	D_REG4:rw:cc		cost(2,90).
 divu_l "divu.l" data4:ro,	D_REG4:rw:cc		cost(2,78).
 divsl_l "divsl.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,90).
 divul_l "divul.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,78).
+mulu_l "mulu.l" data4:ro,	DREG_pair:rw kills :cc	cost(2,44).
 pea		address+control4+regX			cost(2,4).
 #if WORD_SIZE==2
 cmp2_w "cmp2.w" address+control2:ro, genreg2:ro kills :cc cost(2,18).
@@ -3796,6 +3806,18 @@ with exact any4 STACK
     gen add_l {post_inc4, sp}, %a
 			yields	%a
 
+pat adi $1==8
+with exact any4 any4 DD_REG4 DD_REG4
+    uses reusing %1, DD_REG4 = %1
+    gen add_l %2, %4
+	addx_l %a, %3	yields	%4 %3
+with DD_REG4 DD_REG4 D_REG4 any4
+    gen add_l %4, %2
+	addx_l %3, %1	yields	%2 %1
+with DD_REG4 DD_REG4 D_REG4 STACK
+    gen add_l {post_inc4, sp}, %2
+	addx_l %3, %1	yields	%2 %1
+
 #if WORD_SIZE==2
 pat sbi $1==2
 with any2-bconst DD_REG
@@ -3822,6 +3844,12 @@ with exact any4 STACK
 with any4-bconst4 AA_REG
     gen sub_l %1, %2	yields	%2
 
+pat sbi $1==8
+with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
+    /* only 3 of DD_REG4; may unstack %2 into AA_REG */
+    gen sub_l %2, %4
+        subx_l %1, %3	yields	%4 %3
+
 #if WORD_SIZE==2
 pat loc loc cii ldc mli $1==2 && $2==4 && highw($4)==0 && loww($4)>0 && $5==4
 with any2-pre_post
@@ -3847,6 +3875,34 @@ with STACK
 			yields	dl1
 #endif
 
+#ifdef TBL68020
+pat mli $1==8
+with exact data4 data4 DD_REG4 DD_REG4	/* yh yl xh xl */
+    uses DD_REG4 = %4
+    gen mulu_l %1, %a			/* xl * yh */
+	mulu_l %2, %3			/* xh * yl */
+	add_l %3, %a
+	mulu_l %2, {DREG_pair, %3, %4}	/* xl * yl */
+	add_l %a, %3
+			yields	%4 %3
+with DD_REG4 DD_REG4 data4 data4	/* yh yl xh xl */
+    uses DD_REG = %2
+    gen mulu_l %3, %a			/* yl * xh */
+	mulu_l %4, %1			/* yh * xl */
+	add_l %1, %a
+	mulu_l %4, {DREG_pair, %1, %2}	/* yl * xl */
+	add_l %a, %1
+			yields	%2 %1
+with DD_REG4 DD_REG4 STACK		/* yh yl xh xl */
+    uses DD_REG4 = %2
+    gen mulu_l {post_inc4, sp}, %a	/* yl * xh */
+	mulu_l {indirect4, sp}, %1	/* yh * xl */
+	add_l %1, %a
+	mulu_l {post_inc4, sp}, {DREG_pair, %1, %2} /* yl * xl */
+	add_l %a, %1
+			yields	%2 %1
+#endif /* TBL68020 */
+
 #if WORD_SIZE==2
 pat dvi $1==2
 with data2-sconsts DD_REG
@@ -3866,6 +3922,14 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 
+#ifdef TBL68020
+pat dvi $1==8
+with STACK
+    kills ALL
+    gen jsr {absolute4, ".dvi8"}
+			yields	dl1 dl0
+#endif /* TBL68020 */
+
 #if WORD_SIZE==2
 pat rmi $1==2
 with data2-sconsts DD_REG
@@ -3891,6 +3955,14 @@ with STACK
 			yields	dl2
 #endif /* TBL68020 */
 
+#ifdef TBL68020
+pat rmi $1==8
+with STACK
+    kills ALL
+    gen jsr {absolute4, ".rmi8"}
+			yields	dl1 dl0
+#endif /* TBL68020 */
+
 #if WORD_SIZE==2
 pat ngi $1==2
 with DD_REG
@@ -3901,6 +3973,11 @@ pat ngi $1==4
 with DD_REG4
     gen neg_l %1	yields	%1
 
+pat ngi $1==8
+with DD_REG4 DD_REG4
+    gen neg_l %2
+	negx_l %1	yields	%2 %1
+
 #if WORD_SIZE==2
 pat sli $1==2
 with shconreg DD_REG
@@ -3911,6 +3988,43 @@ pat sli $1==4
 with shconreg DD_REG4
     gen asl_l %1, %2	yields	%2
 
+pat sli $1==8
+with DD_REG4 DD_REG4 DD_REG4
+    uses AA_REG = %3		/* no 4th DD_REG */
+    gen lsl_l %1, %3
+	lsl_l %1, %2		/* shift by %1 modulo 64 */
+	bchg {const, 5}, %1
+	bne {slabel, 1f}	/* jump if shift >= 32 */
+	neg_l %1
+	exg %a, %3
+	lsr_l %1, %3		/* (32 - shift) modulo 64 */
+	or_l %3, %2		/* shift bits from %3 to %2 */
+	move %a, %3
+	bra {slabel, 2f}
+	1:
+	move %a, %2
+	lsl_l %1, %2		/* (shift - 32) modulo 64 */
+	2:		yields	%3 %2
+
+pat loc sli ($1&32)==0 && $2==8
+with DD_REG4 DD_REG4
+    uses AA_REG = %2, DD_REG = {bconst, $1&31}
+    gen lsl_l %b, %2
+	lsl_l %b, %1
+	bset {const, 5}, %b
+	neg_l %b
+	exg %a, %2
+	lsr_l %b, %2
+	or_l %2, %1
+	move %a, %2
+			yields	%2 %1
+pat loc sli ($1&63)==32 && $2==8
+with any4 any4		yields	{zero_const, 0} %2
+pat loc sli ($1&32)!=0 && $2==8
+with any4 DD_REG4
+    uses reusing %1, DD_REG = {bconst, $1&31}
+    gen lsl_l %a, %2	yields	{zero_const, 0} %2
+
 #if WORD_SIZE==2
 pat sri $1==2
 with shconreg DD_REG
@@ -3921,6 +4035,43 @@ pat sri $1==4
 with shconreg DD_REG4
     gen asr_l %1, %2	yields	%2
 
+pat sri $1==8
+with DD_REG4 DD_REG4 DD_REG4
+    uses AA_REG = %2		/* no 4th DD_REG */
+    gen asr_l %1, %2
+	lsr_l %1, %3		/* shift by %1 modulo 64 */
+	bchg {const, 5}, %1
+	bne {slabel, 1f}	/* jump if shift >= 32 */
+	neg_l %1
+	exg %a, %2
+	lsl_l %1, %2		/* (32 - shift) modulo 64 */
+	or_l %2, %3		/* shift bits from %2 to %3 */
+	move %a, %2
+	bra {slabel, 2f}
+	1:
+	move %a, %3
+	asr_l %1, %3		/* (shift - 32) modulo 64 */
+	2:		yields	%3 %2
+
+pat loc sri ($1&32)==0 && $2==8
+with DD_REG4 DD_REG4
+    uses AA_REG = %1, DD_REG = {bconst, $1&31}
+    gen asr_l %b, %1
+	lsr_l %b, %2
+	bset {const, 5}, %b
+	neg_l %b
+	exg %a, %1
+	lsl_l %b, %1
+	or_l %1, %2
+	move %a, %1
+			yields	%2 %1
+pat loc sri ($1&63)==32 && $2==8
+with DD_REG4 any4	yields	%1	leaving loc 4 loc 8 cii
+pat loc sri ($1&32)!=0 && $2==8
+with DD_REG4 any4
+    uses reusing %2, DD_REG = {bconst, $1&31}
+    gen asr_l %a, %1	yields	%1	leaving loc 4 loc 8 cii
+
 /************************************************
  * Group 4: unsigned arithmetic.		*
  ************************************************/
@@ -3947,6 +4098,8 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 
+pat mlu $1==8			leaving mli 8
+
 #if WORD_SIZE==2
 pat dvu $1==2
 with data2-sconsts data2
@@ -3966,6 +4119,14 @@ with STACK
 			yields	dl1
 #endif /* TBL68020 */
 
+#ifdef TBL68020
+pat dvu $1==8
+with STACK
+    kills ALL
+    gen jsr {absolute4, ".dvu8"}
+			yields	dl1 dl0
+#endif /* TBL68020 */
+
 #if WORD_SIZE==2
 pat rmu $1==2
 with data2-sconsts data2
@@ -3992,8 +4153,18 @@ with STACK
 			yields	dl2
 #endif /* TBL68020 */
 
+#ifdef TBL68020
+pat rmu $1==8
+with STACK
+    kills ALL
+    gen jsr {absolute4, ".rmu8"}
+			yields	dl1 dl0
+#endif /* TBL68020 */
+
 pat slu				leaving sli $1
 
+pat loc slu $2==8		leaving loc $1 sli 8
+
 #if WORD_SIZE==2
 pat sru $1==2
 with shconreg DD_REG
@@ -4004,6 +4175,43 @@ pat sru $1==4
 with shconreg DD_REG4
     gen lsr_l %1, %2	yields	%2
 
+pat sru $1==8
+with DD_REG4 DD_REG4 DD_REG4
+    uses AA_REG = %2		/* no 4th DD_REG */
+    gen lsr_l %1, %2
+	lsr_l %1, %3		/* shift by %1 modulo 64 */
+	bchg {const, 5}, %1
+	bne {slabel, 1f}	/* jump if shift >= 32 */
+	neg_l %1
+	exg %a, %2
+	lsl_l %1, %2		/* (32 - shift) modulo 64 */
+	or_l %2, %3		/* shift bits from %2 to %3 */
+	move %a, %2
+	bra {slabel, 2f}
+	1:
+	move %a, %3
+	lsr_l %1, %3		/* (shift - 32) modulo 64 */
+	2:		yields	%3 %2
+
+pat loc sru ($1&32)==0 && $2==8
+with DD_REG4 DD_REG4
+    uses AA_REG = %2, DD_REG = {bconst, $1&31}
+    gen lsr_l %b, %1
+	lsr_l %b, %2
+	bset {const, 5}, %b
+	neg_l %b
+	exg %a, %1
+	lsl_l %b, %1
+	or_l %1, %2
+	move %a, %1
+			yields	%2 %1
+pat loc sru ($1&63)==32 && $2==8
+with any4 any4		yields	%1 {zero_const, 0}
+pat loc sru ($1&32)!=0 && $2==8
+with DD_REG4 any4
+    uses reusing %2, DD_REG = {bconst, $1&31}
+    gen lsr_l %a, %1	yields	%1 {zero_const, 0}
+
 /************************************************
  * Group 5: floating point arithmetic		*
  ************************************************/
@@ -4753,6 +4961,17 @@ with exact any_int STACK
     uses reusing %1,DD_REG=%1
     gen xxx* {post_inc_int, sp}, %a	yields %a
 
+proc log8 example and
+with exact data4 data4 DD_REG4 DD_REG4
+    gen xxx* %1, %3
+	xxx* %2, %4			yields	%4 %3
+with DD_REG4 DD_REG4 data4 data4
+    gen xxx* %3, %1
+	xxx* %4, %2			yields	%2 %1
+with DD_REG4 DD_REG4 STACK
+    gen xxx* {post_inc4, sp}, %1
+	xxx* {post_inc4, sp}, %2	yields	%2 %1
+
 proc logdef example and
 with STACK
     uses DD_REG4 = {const, $1/WORD_SIZE -1},
@@ -4813,6 +5032,7 @@ pat and $1==WORD_SIZE			call logw(AND_I)
 #if WORD_SIZE==2
 pat and $1==2*WORD_SIZE			call log2w("and.l")
 #endif
+pat and $1==8				call log8("and.l")
 pat and $1>4 && $1/WORD_SIZE<=65536	call logdef(AND_I)
 pat and defined($1)			call logbdef(AND_I)
 pat and !defined($1)			call logndef(AND_I)
@@ -4821,6 +5041,7 @@ pat ior $1==WORD_SIZE			call logw(OR_I)
 #if WORD_SIZE==2
 pat ior $1==2*WORD_SIZE			call log2w("or.l")
 #endif
+pat ior $1==8				call log8("or.l")
 pat ior $1>2 && $1/WORD_SIZE<=65536	call logdef(OR_I)
 pat ior defined($1)			call logbdef(OR_I)
 pat ior !defined($1)			call logndef(OR_I)
@@ -4835,6 +5056,21 @@ pat xor $1==4
 with DD_REG4 conreg4-bconst4
     gen eor_l %2, %1	yields	%1
 
+pat xor $1==8
+with exact any4 any4 DD_REG4 DD_REG4
+    uses reusing %1, DD_REG4 = %1
+    gen eor_l %a, %3
+	move %2, %a
+	eor_l %a, %4	yields	%4 %3
+with DD_REG4 DD_REG4 D_REG4 any4
+    gen eor_l %3, %1
+	move %4, %3
+	eor_l %3, %2	yields	%2 %1
+with DD_REG4 DD_REG4 DD_REG4 STACK
+    gen eor_l %3, %1
+        move_l {post_inc4, sp}, %3
+	eor_l %3, %2	yields	%2 %1
+
 pat xor $1>4 && $1/WORD_SIZE<=65536		call logdef(EOR_I)
 pat xor defined($1)			call logbdef(EOR_I)
 pat xor !defined($1)			call logndef(EOR_I)
@@ -4907,6 +5143,50 @@ pat rol $1==4
 with shconreg DD_REG4
     gen rol_l %1, %2	yields	%2
 
+pat rol $1==8
+with DD_REG4 DD_REG4 DD_REG4
+    uses AA_REG, AA_REG		/* no 4th DD_REG */
+    gen bclr {const, 5}, %1
+	beq {slabel, 1f}
+	exg %2, %3		/* rotate left 32 */
+	1:
+	move %2, %a
+	move %3, %b
+	lsl_l %1, %2
+	lsl_l %1, %3
+	bset {const, 5}, %1
+	neg_l %1		/* (32 - shift) modulo 64 */
+	exg %a, %2
+	lsr_l %1, %2
+	or_l %2, %3		/* rotate bits from %2 to %3 */
+	move %a, %2
+	exg %b, %3
+	lsr_l %1, %3
+	or_l %3, %2		/* rotate bits from %3 to %2 */
+	move %b, %3
+			yields	%3 %2
+
+pat loc rol ($1&32)==0 && $2==8
+with DD_REG4 DD_REG4
+    uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
+    gen move %1, %a
+	move %2, %b
+	lsl_l %c, %1
+	lsl_l %c, %2
+	bset {const, 5}, %c
+	neg_l %c
+	exg %a, %1
+	lsr_l %c, %1
+	or_l %1, %2
+	move %a, %1
+	exg %b, %2
+	lsr_l %c, %2
+	or_l %2, %1
+	move %b, %2
+			yields	%2 %1
+pat loc rol ($1&63)==32 && $2==8	leaving exg 4
+pat loc rol ($1&32)!=0 && $2==8		leaving loc (0-$1)&31 ror 8
+
 #if WORD_SIZE==2
 pat ror $1==2
 with shconreg DD_REG
@@ -4917,7 +5197,51 @@ pat ror $1==4
 with shconreg DD_REG4
     gen ror_l %1, %2	yields	%2
 
-	
+pat ror $1==8
+with DD_REG4 DD_REG4 DD_REG4
+    uses AA_REG, AA_REG		/* no 4th DD_REG */
+    gen bclr {const, 5}, %1
+	beq {slabel, 1f}
+	exg %2, %3		/* rotate right 32 */
+	1:
+	move %2, %a
+	move %3, %b
+	lsr_l %1, %2
+	lsr_l %1, %3
+	bset {const, 5}, %1
+	neg_l %1		/* (32 - shift) modulo 64 */
+	exg %a, %2
+	lsl_l %1, %2
+	or_l %2, %3		/* rotate bits from %2 to %3 */
+	move %a, %2
+	exg %b, %3
+	lsl_l %1, %3
+	or_l %3, %2		/* rotate bits from %3 to %2 */
+	move %b, %3
+			yields	%3 %2
+
+pat loc ror ($1&32)==0 && $2==8
+with DD_REG4 DD_REG4
+    uses AA_REG, AA_REG, DD_REG = {bconst, $1&31}
+    gen move %1, %a
+	move %2, %b
+	lsr_l %c, %1
+	lsr_l %c, %2
+	bset {const, 5}, %c
+	neg_l %c
+	exg %a, %1
+	lsl_l %c, %1
+	or_l %1, %2
+	move %a, %1
+	exg %b, %2
+	lsl_l %c, %2
+	or_l %2, %1
+	move %b, %2
+			yields	%2 %1
+pat loc ror ($1&63)==32 && $2==8	leaving exg 4
+pat loc ror ($1&32)!=0 && $2==8		leaving loc (0-$1)&31 rol 8
+
+
 
 
 /************************************************
@@ -6391,6 +6715,55 @@ pat cmu zge $1==WORD_SIZE		call cmuzxx("bcc","bls")
 pat cmu zgt $1==WORD_SIZE		call cmuzxx("bhi","bcs")
 
 
+proc cmx8txx example cmi tlt
+with exact DD_REG4 DD_REG4 any4 any4
+    uses reusing %3, DD_REG4 = %3
+    gen sub_l %4, %2
+	subx_l %a, %1	/* keep overflow flag */
+	sxx[2] %1
+	neg_b %1	yields	{dreg1, %1}
+with D_REG4 any4-D_REG4 DD_REG4 DD_REG4
+    /* only 3 of DD_REG4; may unstack %2 into AA_REG */
+    gen sub_l %2, %4
+	subx_l %1, %3
+	sxx[1] %3
+	neg_b %3	yields	{dreg1, %3}
+
+pat cmi tlt $1==8			call cmx8txx("slt","sgt")
+pat cmi tle $1==8			call cmx8txx("sle","sge")
+pat cmi tge $1==8			call cmx8txx("sge","sle")
+pat cmi tgt $1==8			call cmx8txx("sgt","slt")
+pat cms teq $1==8			call cmx8txx("seq","seq")
+pat cms tne $1==8			call cmx8txx("sne","sne")
+pat cmu tlt $1==8			call cmx8txx("scs","shi")
+pat cmu tle $1==8			call cmx8txx("sls","scc")
+pat cmu tge $1==8			call cmx8txx("scc","sls")
+pat cmu tgt $1==8			call cmx8txx("shi","scs")
+
+proc cmx8zxx example cmi zlt
+with exact DD_REG4 DD_REG4 any4 any4
+    kills ALL
+    uses reusing %3, DD_REG4 = %3
+    gen sub_l %4, %2
+	subx_l %a, %1
+	bxx[2] {llabel, $2}
+with D_REG4 any4-D_REG4 DD_REG4 DD_REG4 STACK
+    gen sub_l %2, %4
+	subx_l %1, %3
+	bxx[1] {llabel, $2}
+
+pat cmi zlt $1==8			call cmx8zxx("blt","bgt")
+pat cmi zle $1==8			call cmx8zxx("ble","bge")
+pat cmi zge $1==8			call cmx8zxx("bge","ble")
+pat cmi zgt $1==8			call cmx8zxx("bgt","blt")
+pat cms zeq $1==8			call cmx8zxx("beq","beq")
+pat cms zne $1==8			call cmx8zxx("bne","bne")
+pat cmu zlt $1==8			call cmx8zxx("bcs","bhi")
+pat cmu zle $1==8			call cmx8zxx("bls","bcc")
+pat cmu zge $1==8			call cmx8zxx("bcc","bls")
+pat cmu zgt $1==8			call cmx8zxx("bhi","bcs")
+
+
 #if TBL68881
 proc cmf4zxx example cmf zlt
 with FS_REG FS_REG
@@ -6630,6 +7003,33 @@ uses reusing %1,DD_REG4
 pat loc loc ciu $1==$2	/* skip this */
 pat loc loc cui $1==$2	/* skip this */
 
+pat loc loc cii $1==4 && $2==8
+with exact test_set1+test_set2
+			yields	%1 {zero_const, 0}
+with test_set4
+    uses DD_REG4
+    gen test %1
+	slt {dreg1, %a}
+#ifdef TBL68020
+	extb_l %a
+#else
+	ext_w %a
+	ext_l %a
+#endif
+			yields	%1 %a
+
+pat loc loc cii $1<4 && $2==8
+			leaving loc $1 loc 4 cii loc 4 loc 8 cii
+
+pat loc loc ciu $1==4 && $2==8		yields {zero_const, 0}
+pat loc loc cui $1==4 && $2==8		yields {zero_const, 0}
+pat loc loc cuu $1==4 && $2==8		yields {zero_const, 0}
+
+pat loc loc cii $1==8 && $2==4		leaving asp 4
+pat loc loc ciu $1==8 && $2==4		leaving asp 4
+pat loc loc cui $1==8 && $2==4		leaving asp 4
+pat loc loc cuu $1==8 && $2==4		leaving asp 4
+
 
 /* The following rules should be handled by the peephole optimizer, I think */
 
diff --git a/plat/linux68k/descr b/plat/linux68k/descr
index a530fffdb..d813f61a8 100644
--- a/plat/linux68k/descr
+++ b/plat/linux68k/descr
@@ -10,6 +10,8 @@ var s=2
 var sa={s}
 var l={w}
 var la={w}
+var q=8
+var qa=4
 var f={w}
 var fa={w}
 var d=8
@@ -19,11 +21,12 @@ var xa={x}
 var ARCH=m68020
 var PLATFORM=linux68k
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
-var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020
+var CPP_F=-D__unix -D__mc68020 -D__m68k -D__mc68000 -D__M68020 -D_EM_LLSIZE={q}
 var ALIGN=-a0:4 -a1:4 -a2:4 -a3:4 -b0:0x08000054
 var C_LIB={PLATFORMDIR}/libc-ansi.a
 # bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
+# long long enabled.
+var CC_ALIGN=-Vrq{q}.{qa}
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=-ml10
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr
diff --git a/plat/linux68k/tests/build.lua b/plat/linux68k/tests/build.lua
index 221abc8d6..37edfaada 100644
--- a/plat/linux68k/tests/build.lua
+++ b/plat/linux68k/tests/build.lua
@@ -6,6 +6,5 @@ plat_testsuite {
     method = "plat/linux68k/emu+emu68k",
     skipsets = {
         "floats", -- FPU instructions not supported by emulator
-        "long-long",
     },
 }

From bbaed6bdba6cde01df4ee672417b865c07e9d86a Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Wed, 25 Sep 2019 12:29:41 -0400
Subject: [PATCH 18/22] Edit i386 rules for 8-byte shift, conversion.

Shifts that drop an EM word don't need to coerce the word to REG.
Some arithmetic right shifts can use _cdq_.

Drop rules for illegal integer conversions.  Sizes below a word are
illegal in EM, except as the source size of _cii_.
---
 mach/i386/ncg/table | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index f18bc3e7d..9710ff96b 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -1107,7 +1107,7 @@ with REG REG
 pat loc sli ($1&63)==32 && $2==8
 with a_word a_word		yields %1 {ANYCON,0}
 pat loc sli ($1&32)!=0 && $2==8
-with REG REG
+with REG a_word
   gen sal %1,{ANYCON,$1&31}	yields %1 {ANYCON,0}
 
 /*
@@ -1140,11 +1140,13 @@ pat loc sri ($1&32)==0 && $2==8
 with REG REG
   gen shrd %1,%2,{ANYCON,$1&31}
       sar %2,{ANYCON,$1&31}	yields %2 %1
+pat loc sri ($1&63)==32
+with a_word ACC
+  gen cdq.			yields edx eax
 pat loc sri ($1&32)!=0 && $2==8
-with REG REG
-  gen mov %1,%2
-      sar %1,{ANYCON,$1&31}
-      sar %2,{ANYCON,31}	yields %2 %1
+with a_word ACC
+  gen sar eax,{ANYCON,$1&31}
+      cdq.			yields edx eax
 
 /*
 pat sri !defined($1)
@@ -1258,7 +1260,7 @@ with REG REG
 pat loc sru ($1&63)==32 && $2==8
 with a_word a_word		yields {ANYCON,0} %2
 pat loc sru ($1&32)!=0 && $2==8
-with REG REG
+with a_word REG
   gen shr %2,{ANYCON,$1&31}	yields {ANYCON,0} %2
 
 /*
@@ -2212,18 +2214,18 @@ with ACC
 
 pat loc loc cii $1<4 && $2==8	leaving loc $1 loc 4 cii loc 4 loc $2 cii
 
-pat loc loc cii $1==8 && $2<=4
+pat loc loc cii $1==8 && $2==4
 with a_word a_word		yields %1
 
 pat loc loc ciu				leaving loc $1 loc $2 cuu
 pat loc loc cui				leaving loc $1 loc $2 cuu
 
-pat loc loc cuu $1==$2 || ($1<=4 && $2<=4)
+pat loc loc cuu $1==$2
 
-pat loc loc cuu $1<=4 && $2==8
+pat loc loc cuu $1==4 && $2==8
 with a_word			yields {ANYCON,0} %1
 
-pat loc loc cuu $1==8 && $2<=4
+pat loc loc cuu $1==8 && $2==4
 with a_word a_word		yields %1
 
 pat loc loc cif $1==4 && $2==4		leaving loc 4 cal ".cif4" asp 4

From d6413c1a11bb40c58ab48a42441fbb331fb192dc Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Wed, 25 Sep 2019 12:35:36 -0400
Subject: [PATCH 19/22] Enable long long in ACK C for osx386.

---
 plat/osx386/descr | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/plat/osx386/descr b/plat/osx386/descr
index afe46c588..045d2e720 100644
--- a/plat/osx386/descr
+++ b/plat/osx386/descr
@@ -8,6 +8,8 @@ var s=2
 var sa={s}
 var l={w}
 var la={w}
+var q=8
+var qa=4
 var f={w}
 var fa={w}
 var d=8
@@ -17,11 +19,12 @@ var xa={x}
 var ARCH=i386
 var PLATFORM=osx386
 var PLATFORMDIR={EM}/share/ack/{PLATFORM}
-var CPP_F=-D__unix
+var CPP_F=-D__unix -D_EM_LLSIZE={q}
 var ALIGN=-a0:4 -a1:4 -a2:4096 -a3:4 -b0:0x123c
 var C_LIB={PLATFORMDIR}/libc-ansi.a
 # bitfields reversed for compatibility with (g)cc.
-var CC_ALIGN=-Vr
+# long long enabled.
+var CC_ALIGN=-Vrq{q}.{qa}
 var OLD_C_LIB={C_LIB}
 var MACHOPT_F=-m10
 var EGO_PLAT_FLAGS=-M{EM}/share/ack/ego/{ARCH}.descr

From 0b0c3d5b60c1126c47358f18e06846219512a054 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Fri, 27 Sep 2019 12:15:10 -0400
Subject: [PATCH 20/22] Add csa 8, csb 8 for i386, m68020.

This allows `long long x; switch (x) {...}` in C.  Add test in C.

This adapts the code for csa 8 and csb 8 from the existing code for
csa 4 and csb 4, for both i386 and m68020.
---
 mach/i386/libem/build.lua         |  2 +-
 mach/i386/libem/csa8.s            | 35 +++++++++++++++++
 mach/i386/libem/csb8.s            | 39 +++++++++++++++++++
 mach/i386/ncg/table               | 10 +++++
 mach/m68020/libem/build.lua       |  2 +-
 mach/m68020/libem/csa8.s          | 39 +++++++++++++++++++
 mach/m68020/libem/csb8.s          | 39 +++++++++++++++++++
 mach/m68020/ncg/table             | 10 +++++
 tests/plat/build.lua              |  3 +-
 tests/plat/long-long/llswitch_e.c | 62 +++++++++++++++++++++++++++++++
 10 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 mach/i386/libem/csa8.s
 create mode 100644 mach/i386/libem/csb8.s
 create mode 100644 mach/m68020/libem/csa8.s
 create mode 100644 mach/m68020/libem/csb8.s
 create mode 100644 tests/plat/long-long/llswitch_e.c

diff --git a/mach/i386/libem/build.lua b/mach/i386/libem/build.lua
index 37e05a45c..6d4322a9c 100644
--- a/mach/i386/libem/build.lua
+++ b/mach/i386/libem/build.lua
@@ -1,7 +1,7 @@
 for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
-		srcs = { "./*.s" }, -- divrem8.s
+		srcs = { "./*.s" }, -- csb8.s
 		vars = { plat = plat },
 	}
 end
diff --git a/mach/i386/libem/csa8.s b/mach/i386/libem/csa8.s
new file mode 100644
index 000000000..80a3d84d6
--- /dev/null
+++ b/mach/i386/libem/csa8.s
@@ -0,0 +1,35 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.define .csa8
+
+.sect .text
+.csa8:
+		! ebx, descriptor address
+		! edx:eax, index
+
+	mov	ecx,(ebx)	! default
+	sub	eax,4(ebx)
+	sbb	edx,8(ebx)	! index - lower bound
+	push	edx
+	push	eax
+	mov	eax,12(ebx)
+	mov	edx,16(ebx)	! upper bound - lower bound
+	sub	eax,(esp)
+	sbb	edx,4(esp)	! upper bound - index
+	pop	eax
+	pop	edx
+	jb	1f		! jump if upper bound < index
+	! assuming edx:eax < 2**30
+	mov	ebx,20(ebx)(eax*4)
+	test	ebx,ebx
+	jnz	2f
+1:
+	mov	ebx,ecx
+	test	ebx,ebx
+	jnz	2f
+.extern ECASE
+.extern .fat
+	mov	eax,ECASE
+	push	eax
+	jmp	.fat
+2:
+	jmp	ebx
diff --git a/mach/i386/libem/csb8.s b/mach/i386/libem/csb8.s
new file mode 100644
index 000000000..8af3d1e20
--- /dev/null
+++ b/mach/i386/libem/csb8.s
@@ -0,0 +1,39 @@
+.sect .text; .sect .rom; .sect .data; .sect .bss
+.define .csb8
+
+.sect .text
+.csb8:
+		! ebx, descriptor address
+		! edx:eax, index
+
+	push	esi
+	push	edi
+	push	(ebx)		! default
+	mov	ecx,4(ebx)	! entry count (assuming < 2**32)
+1:
+	add	ebx,12
+	dec	ecx
+	jl	4f
+	mov	esi,0(ebx)
+	mov	edi,4(ebx)	! descriptor's index
+	sub	esi,eax
+	sbb	edi,edx
+	or	esi,edi
+	jne	1b
+	pop	edx		! drop default
+	mov	ebx,8(ebx)
+2:
+	pop	edi
+	pop	esi
+	test	ebx,ebx
+	jnz	3f
+.extern ECASE
+.extern .fat
+	mov	eax,ECASE
+	push	eax
+	jmp	.fat
+3:
+	jmp	ebx
+4:
+	pop	ebx		! default
+	jmp	2b
diff --git a/mach/i386/ncg/table b/mach/i386/ncg/table
index 9710ff96b..b91f4c8f5 100644
--- a/mach/i386/ncg/table
+++ b/mach/i386/ncg/table
@@ -3074,6 +3074,11 @@ with BXREG ACC
 kills ALL
   gen jmp {label, ".csa4"}
 
+pat csa $1==8
+with BXREG ACC DXREG
+kills ALL
+  gen jmp {label, ".csa8"}
+
 pat csa !defined($1)
 with rm-BXREG-ACC BXREG ACC
 kills ALL
@@ -3086,6 +3091,11 @@ with BXREG ACC
 kills ALL
   gen jmp {label, ".csb4"}
 
+pat csb $1==8
+with BXREG ACC DXREG
+kills ALL
+  gen jmp {label, ".csb8"}
+
 pat csb !defined($1)
 with rm-BXREG-ACC BXREG ACC
   gen cmp %1,{ANYCON,4}
diff --git a/mach/m68020/libem/build.lua b/mach/m68020/libem/build.lua
index d5c9af8ad..c0356c61c 100644
--- a/mach/m68020/libem/build.lua
+++ b/mach/m68020/libem/build.lua
@@ -2,7 +2,7 @@ for _, plat in ipairs(vars.plats) do
 	acklibrary {
 		name = "lib_"..plat,
 		srcs = {
-			"./*.s", -- added divrem8.s
+			"./*.s", -- csb8.s
 			"./*.c"
 		},
 		vars = { plat = plat },
diff --git a/mach/m68020/libem/csa8.s b/mach/m68020/libem/csa8.s
new file mode 100644
index 000000000..7c46382e9
--- /dev/null
+++ b/mach/m68020/libem/csa8.s
@@ -0,0 +1,39 @@
+.define	.csa8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+ECASE=20
+
+.sect .text
+
+.csa8:
+		! jump table address in a0
+		! index in (sp)
+
+	move.l	(a0)+, a1	! default address
+	move.l	(sp)+, d0
+	move.l	(sp)+, d1	! index
+	move.l	(a0)+, d2
+	sub.l	(a0)+, d1
+	subx.l	d2, d0		! index - lower bound in d0:d1
+	move.l	d1, a2
+	move.l	(a0)+, d2
+	move.l	(a0)+, d1	! upper - lower bound
+	sub.l	a2, d1
+	subx.l	d0, d2		! upper - index
+	bcs	1f
+	move.l	a2, d1
+	! assuming d0:d1 < 65536
+	move.l	(a0,d1.l*4), d1	! jump address
+	beq	1f
+	move.l	d1,a1
+	jmp	(a1)
+1:
+	move.l	a1, d0
+	beq	2f
+	jmp	(a1)		! jump to specified address
+2:
+	move.l	#ECASE, -(sp)
+	jmp	(.fatal)
+.align 2
diff --git a/mach/m68020/libem/csb8.s b/mach/m68020/libem/csb8.s
new file mode 100644
index 000000000..d039ec216
--- /dev/null
+++ b/mach/m68020/libem/csb8.s
@@ -0,0 +1,39 @@
+.define	.csb8
+.sect .text
+.sect .rom
+.sect .data
+.sect .bss
+ECASE=20
+
+.sect .text
+
+.csb8:
+		! case descriptor in a0
+		! index in (sp)
+
+	move.l	(a0)+, a1	! default jump address
+	move.l	(sp)+, d2
+	move.l	(sp), a2	! index in d2:a2
+	move.l	d7, (sp)
+	add.l	#4, a0
+	move.l	(a0)+, d1	! # entries (assuming <= 65536)
+	beq	3f
+	sub.l	#1, d1
+1:
+	move.l	(a0)+, d0
+	move.l	(a0)+, d7	! descriptor's index in d0:d7
+	add.l	#4, a0
+	sub.l	a2, d7
+	subx.l	d2, d0		! descriptor's index - our index
+	dbeq	d1, 1b
+	bne	3f
+	move.l	(-4,a0), a1	! jump address
+3:
+	move.l	(sp)+, d7	! caller's d7
+	move.l	a1, d0
+	beq	4f
+	jmp	(a1)
+4:
+	move.l	#ECASE, -(sp)
+	jmp	(.fatal)
+.align 2
diff --git a/mach/m68020/ncg/table b/mach/m68020/ncg/table
index fe1099078..a0f649b3e 100644
--- a/mach/m68020/ncg/table
+++ b/mach/m68020/ncg/table
@@ -6057,6 +6057,11 @@ with any4 D_REG4+DLOCAL+const4+absolute4 STACK
 	move %2,dl0
 	jmp {absolute4, ".csa4"}
 
+pat csa $1==8
+with any4 STACK
+    gen move %1,a0
+	jmp {absolute4, ".csa8"}
+
 #if WORD_SIZE==2
 pat csb $1==2
 #if TBL68020
@@ -6079,6 +6084,11 @@ with any4 D_REG4+DLOCAL+const4+absolute4 STACK
 	move %2,dl0
 	jmp {absolute4, ".csb4"}
 
+pat csb $1==8
+with any4 STACK
+    gen move %1,a0
+	jmp {absolute4, ".csb8"}
+
 pat dch				leaving loi 4
 
 #if WORD_SIZE==2
diff --git a/tests/plat/build.lua b/tests/plat/build.lua
index 1613255be..e5318eae4 100644
--- a/tests/plat/build.lua
+++ b/tests/plat/build.lua
@@ -1,10 +1,11 @@
+
 include("plat/build.lua")
 
 definerule("plat_testsuite",
 	{
 		plat = { type="string" },
 		method = { type="string" },
-		-- added long-long/lldivrem_e.c
+		-- added long-long/llswitch_e.c
 		sets = { type="table", default={"core", "b", "bugs", "m2", "floats", "long-long"}},
 		skipsets = { type="table", default={}},
 		tests = { type="targets", default={} },
diff --git a/tests/plat/long-long/llswitch_e.c b/tests/plat/long-long/llswitch_e.c
new file mode 100644
index 000000000..8ecc11812
--- /dev/null
+++ b/tests/plat/long-long/llswitch_e.c
@@ -0,0 +1,62 @@
+#include "test.h"
+
+long long a = -719560752603LL;
+long long b = -319239774717LL;
+long long c =  100200300401LL;
+long long d =  100200300402LL;
+long long e =  100200300403LL;
+long long f =  100200300404LL;
+long long g =  100200300405LL;
+long long h =  100200300406LL;
+long long i =  541934347449LL;
+long long j =  727503252688LL;
+
+int compact(long long x) {
+	/* probably _csa_ */
+	switch (x) {
+	case 100200300401LL: return 23;
+	case 100200300402LL: return 29;
+	case 100200300403LL: return 31;
+	case 100200300405LL: return 37;
+	case 100200300406LL: return 41;
+	default: return 43;
+	}
+}
+
+int sparse(long long x) {
+	/* probably _csb_ */
+	switch (x) {
+	case -719560752603LL: return 47;
+	case -319239774717LL: return 53;
+	case  100200300403LL: return 59;
+	case  541934347449LL: return 61;
+	case  727503252688LL: return 67;
+	default: return 71;
+	}
+}
+
+void _m_a_i_n(void) {
+	ASSERT(compact(a) == 43);
+	ASSERT(compact(b) == 43);
+	ASSERT(compact(c) == 23);
+	ASSERT(compact(d) == 29);
+	ASSERT(compact(e) == 31);
+	ASSERT(compact(f) == 43);
+	ASSERT(compact(g) == 37);
+	ASSERT(compact(h) == 41);
+	ASSERT(compact(i) == 43);
+	ASSERT(compact(j) == 43);
+
+	ASSERT(sparse(a) == 47);
+	ASSERT(sparse(b) == 53);
+	ASSERT(sparse(c) == 71);
+	ASSERT(sparse(d) == 71);
+	ASSERT(sparse(e) == 59);
+	ASSERT(sparse(f) == 71);
+	ASSERT(sparse(g) == 71);
+	ASSERT(sparse(h) == 71);
+	ASSERT(sparse(i) == 61);
+	ASSERT(sparse(j) == 67);
+
+	finished();
+}

From a434749fd958d100ee9106e1e0920db71ca6b636 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Fri, 4 Oct 2019 18:58:56 -0400
Subject: [PATCH 21/22] Widen valu_t in the assembler to 64 bits.

Most machines had undefined valu_t and redefined it to a different
type.  Edit mach/*/as/mach0.c to remove such redefinitions, so the
next change to valu_t will affect all machines.

Edit mach/proto/as/comm0.h to change valu_t to int64_t, and add
uvalu_t and uint64_t.

Remove int64_t y_valu8 from the yacc %union, now that valu_t y_valu
can hold 64 bits.  Replace y_valu8 with y_valu.  The .data8 pseudo
becomes less special; it now accepts absolute expressions.

This change simplifies the assembler and seems to have no effect on
the assembled output.  Among the files in share/ack/examples, the only
changes are in hilo_bas.* and startrek_c.linuxppc, but those files
seem to change whenever I rebuild them.
---
 mach/arm/as/mach0.c     |  3 ---
 mach/i386/as/mach0.c    |  2 --
 mach/i86/as/mach0.c     |  3 ---
 mach/m68020/as/mach0.c  |  2 --
 mach/m68k2/as/mach0.c   |  2 --
 mach/mips/as/mach0.c    |  3 ---
 mach/ns/as/mach0.c      |  2 --
 mach/powerpc/as/mach0.c |  3 ---
 mach/proto/as/comm0.h   |  3 ++-
 mach/proto/as/comm2.y   | 15 +++------------
 mach/proto/as/comm5.c   | 13 +++++--------
 mach/vax4/as/mach0.c    |  2 --
 mach/vc4/as/mach0.c     |  3 ---
 mach/z8000/as/mach0.c   |  2 --
 man/uni_ass.6           | 11 ++++-------
 15 files changed, 14 insertions(+), 55 deletions(-)

diff --git a/mach/arm/as/mach0.c b/mach/arm/as/mach0.c
index 855541a15..1853f8de4 100644
--- a/mach/arm/as/mach0.c
+++ b/mach/arm/as/mach0.c
@@ -8,9 +8,6 @@
 #define WORDS_REVERSED
 #define BYTES_REVERSED
 */
-#undef valu_t
-#define valu_t 	long
-
 #undef ADDR_T
 #define ADDR_T long
 
diff --git a/mach/i386/as/mach0.c b/mach/i386/as/mach0.c
index 6fb5358d5..63f719565 100644
--- a/mach/i386/as/mach0.c
+++ b/mach/i386/as/mach0.c
@@ -11,8 +11,6 @@
 #define	LISTING		/* enable listing facilities */
 #define RELOCATION	/* generate relocation info */
 
-#undef valu_t
-#define valu_t long
 #undef ADDR_T
 #define ADDR_T long
 
diff --git a/mach/i86/as/mach0.c b/mach/i86/as/mach0.c
index 1c47e4874..fc69730c9 100644
--- a/mach/i86/as/mach0.c
+++ b/mach/i86/as/mach0.c
@@ -12,9 +12,6 @@
 #define RELOCATION	/* generate relocation info */
 #define DEBUG 0
 
-#undef valu_t
-#define valu_t long
-
 #undef ALIGNWORD
 #define ALIGNWORD 2
 #undef ALIGNSECT
diff --git a/mach/m68020/as/mach0.c b/mach/m68020/as/mach0.c
index 498620c0e..8e79b89a6 100644
--- a/mach/m68020/as/mach0.c
+++ b/mach/m68020/as/mach0.c
@@ -14,8 +14,6 @@
 #define RELOCATION		/* generate relocatable code */
 #define DEBUG 0
 
-#undef	valu_t
-#define	valu_t		long
 #undef	ADDR_T
 #define	ADDR_T		long
 
diff --git a/mach/m68k2/as/mach0.c b/mach/m68k2/as/mach0.c
index 72845b0be..62878f7dc 100644
--- a/mach/m68k2/as/mach0.c
+++ b/mach/m68k2/as/mach0.c
@@ -17,8 +17,6 @@
 
 #define Xfit(f)		if (!(f)) Xnofit();
 
-#undef	valu_t
-#define	valu_t		long
 #undef	ADDR_T
 #define	ADDR_T		long
 
diff --git a/mach/mips/as/mach0.c b/mach/mips/as/mach0.c
index edce6640b..8aef3d697 100644
--- a/mach/mips/as/mach0.c
+++ b/mach/mips/as/mach0.c
@@ -8,9 +8,6 @@
 #define RELOCATION          /* generate relocatable code */
 #define DEBUG 0
 
-#undef valu_t
-#define valu_t int32_t
-
 #undef ADDR_T
 #define ADDR_T uint32_t
 
diff --git a/mach/ns/as/mach0.c b/mach/ns/as/mach0.c
index e4ba505ae..218f7c76e 100644
--- a/mach/ns/as/mach0.c
+++ b/mach/ns/as/mach0.c
@@ -10,8 +10,6 @@
 #define	THREE_PASS	/* branch and offset optimization */
 #define	LISTING		/* enable listing facilities */
 
-#undef	valu_t
-#define valu_t		long
 #undef	ADDR_T
 #define	ADDR_T		long
 #undef	ALIGNSECT
diff --git a/mach/powerpc/as/mach0.c b/mach/powerpc/as/mach0.c
index 1c2051753..b6eec8f2d 100644
--- a/mach/powerpc/as/mach0.c
+++ b/mach/powerpc/as/mach0.c
@@ -10,9 +10,6 @@
 #define RELOCATION          /* generate relocatable code */
 #define DEBUG 0
 
-#undef valu_t
-#define valu_t int32_t
-
 #undef ADDR_T
 #define ADDR_T uint32_t
 
diff --git a/mach/proto/as/comm0.h b/mach/proto/as/comm0.h
index 03b48f75e..5ef226be6 100644
--- a/mach/proto/as/comm0.h
+++ b/mach/proto/as/comm0.h
@@ -58,7 +58,8 @@ _include	<string.h>
 
 #define	GENLAB		"I"		/* compiler generated labels */
 
-#define	valu_t		long		/* type of expression values */
+#define	valu_t		int64_t		/* type of expression values */
+#define	uvalu_t		uint64_t	/* unsigned valu_t */
 #define	ADDR_T		unsigned short	/* type of dot */
 #define	word_t		short		/* type of keyword value */
 /*
diff --git a/mach/proto/as/comm2.y b/mach/proto/as/comm2.y
index 1e28979fa..83d74f5fc 100644
--- a/mach/proto/as/comm2.y
+++ b/mach/proto/as/comm2.y
@@ -22,7 +22,6 @@ static item_t	*last_it, *o_it;
 %union {
 	word_t	y_word;
 	valu_t	y_valu;
-	int64_t	y_valu8;
 	expr_t	y_expr;
 	item_t	*y_item;
 #ifdef ASLD
@@ -44,7 +43,7 @@ static item_t	*last_it, *o_it;
 %token NUMBER2
 %token NUMBER3
 %token NUMBER4
-%token <y_valu8> NUMBER8
+%token <y_valu> NUMBER8
 %token NUMBERF
 %token DOT
 %token EXTERN
@@ -77,7 +76,6 @@ static item_t	*last_it, *o_it;
 %nonassoc '~'
 
 %type <y_valu> absexp optabs1 optabs2
-%type <y_valu8> datum8
 %type <y_expr> expr
 %type <y_item> id_fb
 
@@ -285,17 +283,10 @@ datalist
 			}
 	;
 
-/* datum8 isn't expr, because int64_t may be wider than valu_t. */
-datum8	:	NUMBER8
-			{	$$ = $1;}
-	|	'-' NUMBER8
-			{	$$ = -$2;}
-	;
-
 data8list
-	:	datum8
+	:	absexp
 			{	emit8($1);}
-	|	data8list ',' datum8
+	|	data8list ',' absexp
 			{	emit8($3);}
 	;
 
diff --git a/mach/proto/as/comm5.c b/mach/proto/as/comm5.c
index 2987aedb2..ee877120c 100644
--- a/mach/proto/as/comm5.c
+++ b/mach/proto/as/comm5.c
@@ -121,7 +121,7 @@ void putval(int c)
 			v = yylval.y_valu;
 			goto putnum;
 		case NUMBER8:
-			v = yylval.y_valu8;
+			v = yylval.y_valu;
 			for (n = 0; n < sizeof(v); n++)
 			{
 				if (v == 0)
@@ -132,7 +132,7 @@ void putval(int c)
 				c = NUMBER0 + n;
 			else
 				n = 8;
-			v = yylval.y_valu8;
+			v = yylval.y_valu;
 		putnum:
 			putc(c, tempfile);
 			putc(c >> 8, tempfile);
@@ -236,10 +236,7 @@ int getval(int c)
 				v <<= 8;
 				v |= getc(tempfile);
 			}
-			if (c == NUMBER8)
-				yylval.y_valu8 = v;
-			else
-				yylval.y_valu = v;
+			yylval.y_valu = v;
 			return (c);
 		case IDENT:
 		case FBSYM:
@@ -421,7 +418,7 @@ static void need_stringbuf()
 
 static int innumber(int c)
 {
-	uint64_t uv;
+	uvalu_t uv;
 	char* p;
 	int radix;
 	static char num[40 + 1];
@@ -473,7 +470,7 @@ static int innumber(int c)
 			serror("digit exceeds radix");
 		uv = uv * radix + c;
 	}
-	yylval.y_valu8 = uv; /* signed = unsigned */
+	yylval.y_valu = uv; /* signed = unsigned */
 	return (NUMBER8);
 
 floatconstant:
diff --git a/mach/vax4/as/mach0.c b/mach/vax4/as/mach0.c
index 531de3a15..97f8ca93f 100644
--- a/mach/vax4/as/mach0.c
+++ b/mach/vax4/as/mach0.c
@@ -12,8 +12,6 @@
 #define LISTING
 #define RELOCATION
 
-#undef valu_t
-#define valu_t long
 #undef word_t
 #define word_t long
 #undef ADDR_T
diff --git a/mach/vc4/as/mach0.c b/mach/vc4/as/mach0.c
index 2df70061c..0e92645ea 100644
--- a/mach/vc4/as/mach0.c
+++ b/mach/vc4/as/mach0.c
@@ -10,9 +10,6 @@
 #define RELOCATION          /* generate relocatable code */
 #define DEBUG 0
 
-#undef valu_t
-#define valu_t long
-
 #undef ADDR_T
 #define ADDR_T long
 
diff --git a/mach/z8000/as/mach0.c b/mach/z8000/as/mach0.c
index 15b97913b..c731d5a50 100644
--- a/mach/z8000/as/mach0.c
+++ b/mach/z8000/as/mach0.c
@@ -14,7 +14,5 @@
 #define ASLD
 #undef ALIGNSECT
 #define ALIGNSECT	2
-#undef valu_t
-#define valu_t long
 #undef ADDR_T
 #define ADDR_T long
diff --git a/man/uni_ass.6 b/man/uni_ass.6
index 558fae2de..572ec0ff1 100644
--- a/man/uni_ass.6
+++ b/man/uni_ass.6
@@ -154,9 +154,8 @@ if the number starts with '0x' it is hexadecimal else
     if the number starts with '0' it is octal else
         it's decimal.
 .fi
-The range of numbers depends on the machine.
-A rule of the thumb is that the width of the machine's registers
-the same is as the number of bits allowed in numbers.
+The width of numbers is at least 64 bits, so the .data8 pseudo may
+accept the full range of 8-byte values.
 .IP comment
 The character '!' denotes the start of comment, every character
 up to the next newline is skipped.
@@ -206,11 +205,9 @@ This is not followed by automatic alignment.
 .Pu ".data4 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of longs (4-byte values).
 This is not followed by automatic alignment.
-.Pu ".data8 \fIliteralint\fP [, \fIliteralint\fP]*"
+.Pu ".data8 \fIexpression\fP [, \fIexpression\fP]*"
 Initialize a sequence of long longs (8-byte values).
-This accepts only literal integers, not symbols nor expressions; but
-a \fIliteralint\fP may be any signed or unsigned 8-byte integer, even
-if it is outside the usual range for the machine.
+The expressions must be absolute.
 This is not followed by automatic alignment.
 .Pu ".dataf4 \fIliteralfloat\fP [, \fIliteralfloat\fP]*"
 Initialize a sequence of floats (4-byte values).

From b9bd89914feebe8c0c1ca519470771039a9b7692 Mon Sep 17 00:00:00 2001
From: George Koehler <kernigh@gmail.com>
Date: Sat, 5 Oct 2019 00:56:43 -0400
Subject: [PATCH 22/22] Add back _EM_LSIZE == 8 to <stdint.h>

This will cause ACK libc to provide int64_t as long (instead of long
long) on LP64, if we ever get such a platform.

LP64 would have 64-bit long and 64-bit long long, so int64_t might be
either type.  For example on amd64, int64_t is long in NetBSD libc,
and long long in OpenBSD libc.  Support for long long in ACK remains
incomplete (no printf "%lld"), so it seems better to prefer long where
possible.  Also, int64_t being long before long long is more
consistent with int32_t being int before long.

Put suffixes on the values of INT32_MAX, INT64_MAX, and related
constants, so they have the same types as int32_t and int64_t.
---
 lang/cem/libcc.ansi/headers/stdint.h | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/lang/cem/libcc.ansi/headers/stdint.h b/lang/cem/libcc.ansi/headers/stdint.h
index e21b48d7a..cd10049b9 100644
--- a/lang/cem/libcc.ansi/headers/stdint.h
+++ b/lang/cem/libcc.ansi/headers/stdint.h
@@ -27,17 +27,29 @@ typedef unsigned short  uint16_t;
 #if	_EM_WSIZE == 4
 typedef signed int      int32_t;
 typedef unsigned int    uint32_t;
-#else
-typedef signed long     int32_t;
-typedef unsigned long   uint32_t;
-#endif
 #define INT32_MAX       2147483647
 #define INT32_MIN       (-2147483648)
 #define UINT32_MAX      4294967295U
+#else
+typedef signed long     int32_t;
+typedef unsigned long   uint32_t;
+#define INT32_MAX       2147483647L
+#define INT32_MIN       (-2147483648L)
+#define UINT32_MAX      4294967295UL
+#endif
 
-/* We only get int64_t if long longs are 8 bytes. */
+/* We only get int64_t if longs or long longs are 8 bytes. */
 
-#if _EM_LLSIZE == 8
+#if _EM_LSIZE == 8
+typedef signed long     int64_t;
+typedef unsigned long   uint64_t;
+#define INT64_MAX       9223372036854775807L
+#define INT64_MIN       (-9223372036854775808L)
+#define UINT64_MAX      18446744073709551615UL
+
+typedef int64_t         intmax_t;
+typedef uint64_t        uintmax_t;
+#elif _EM_LLSIZE == 8
 typedef signed long long    int64_t;
 typedef unsigned long long  uint64_t;
 #define INT64_MAX       9223372036854775807LL