Experiment with 8-byte integers in ncg i386.
This provides adi, sbi, mli, dvi, rmi, ngi, dvu, rmu 8, but is missing shifts and rotates. It is also missing conversions between 8-byte integers and other sizes of integers or floats. The code might not be all correct, but works at least some of the time. I adapted this from how ncg i86 does 4-byte integers, but I use a different algorithm when dividing by a large value: i86 avoids the div instruction and uses a shift-and-subtract loop; but I use the div instruction to estimate a quotient, which is more like how big integer libraries do division. My .dvi8 and .dvu8 also set ecx:ebx to the remainder; this might be a bad idea, because it requires .dvi8 and .dvu8 to always calculate the remainder, even when the caller only wants the quotient. To play with 8-byte integers, I wrote EM procedures like mes 2, 4, 4 exp $ngi pro $ngi,0 ldl 4 ngi 8 lol 0 sti 8 lol 0 ret 4 end exp $adi pro $adi,0 ldl 4 ldl 12 adi 8 lol 0 sti 8 lol 0 ret 4 end and called them from C like typedef struct { int l; int h; } q; q ngi(q); q adi(q, q);
This commit is contained in:
parent
1faff418ec
commit
893df4b79b
|
@ -1,7 +1,7 @@
|
||||||
for _, plat in ipairs(vars.plats) do
|
for _, plat in ipairs(vars.plats) do
|
||||||
acklibrary {
|
acklibrary {
|
||||||
name = "lib_"..plat,
|
name = "lib_"..plat,
|
||||||
srcs = { "./*.s" },
|
srcs = { "./*.s" }, -- dvi8.s
|
||||||
vars = { plat = plat },
|
vars = { plat = plat },
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
115
mach/i386/libem/dvi8.s
Normal file
115
mach/i386/libem/dvi8.s
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
|
.sect .text
|
||||||
|
.define .dvi8, .dvu8
|
||||||
|
|
||||||
|
yl=8
|
||||||
|
yh=12
|
||||||
|
xl=16
|
||||||
|
xh=20
|
||||||
|
! .dvi8 and .dvu8 divide x = xh:xl by y = yh:yl,
|
||||||
|
! yield edx:eax = quotient, ecx:ebx = remainder.
|
||||||
|
|
||||||
|
.dvu8:
|
||||||
|
! Unsigned division: set di = 0 for non-negative quotient.
|
||||||
|
push edi
|
||||||
|
xor di,di
|
||||||
|
mov eax,xh(esp)
|
||||||
|
mov edx,yh(esp)
|
||||||
|
and edx,edx
|
||||||
|
jmp 7f
|
||||||
|
|
||||||
|
.dvi8:
|
||||||
|
! Signed division: replace x and y with their absolute values.
|
||||||
|
! Set di = 1 for negative quotient, 0 for non-negative.
|
||||||
|
push edi
|
||||||
|
xor di,di ! di = 0
|
||||||
|
mov eax,xh(esp)
|
||||||
|
and eax,eax
|
||||||
|
jns 1f
|
||||||
|
inc di ! di = 1
|
||||||
|
neg eax
|
||||||
|
neg xl(esp)
|
||||||
|
sbb eax,0 ! eax:xl = absolute value of x
|
||||||
|
1: mov edx,yh(esp)
|
||||||
|
and edx,edx
|
||||||
|
jns 7f
|
||||||
|
xor di,1 ! flip di
|
||||||
|
neg edx
|
||||||
|
neg yl(esp)
|
||||||
|
sbb edx,0 ! edx:yl = absolute value of y
|
||||||
|
|
||||||
|
7: ! Here .dvu8 joins .dvi8, eax = xh, edx = yh, flags test edx,
|
||||||
|
! the values in xh(esp) and yh(esp) are garbage.
|
||||||
|
jnz 8f ! jump if y >= 2**32
|
||||||
|
|
||||||
|
! x / y = x / yl = xh / yl + xl / yl = qh + (xl + rh) / yl
|
||||||
|
! where qh and rh are quotient, remainder from xh / yl.
|
||||||
|
mov ebx,yl(esp)
|
||||||
|
xor edx,edx ! edx:eax = xh
|
||||||
|
div ebx ! eax = qh, edx = rh
|
||||||
|
mov ecx,eax
|
||||||
|
mov eax,xl(esp)
|
||||||
|
div ebx ! eax = ql, edx = remainder
|
||||||
|
mov ebx,edx
|
||||||
|
mov edx,ecx ! edx:eax = quotient qh:ql
|
||||||
|
xor ecx,ecx ! ecx:ebx = remainder
|
||||||
|
|
||||||
|
9: ! Finally, if di != 0 then negate quotient, remainder.
|
||||||
|
and di,di
|
||||||
|
jz 1f
|
||||||
|
neg edx
|
||||||
|
neg eax
|
||||||
|
sbb edx,0 ! negate quotient edx:eax
|
||||||
|
neg ecx
|
||||||
|
neg ebx
|
||||||
|
sbb ecx,0 ! negate remainder ecx:ebx
|
||||||
|
1: pop edi ! caller's edi
|
||||||
|
ret 16
|
||||||
|
|
||||||
|
8: ! We come here if y >= 2**32.
|
||||||
|
mov xh(esp),eax
|
||||||
|
mov yh(esp),edx
|
||||||
|
mov ebx,yl(esp) ! edx:ebx = y
|
||||||
|
|
||||||
|
! Estimate x / y as q = (x / (y >> cl)) >> cl,
|
||||||
|
! where 2**31 <= (y >> cl) < 2**32.
|
||||||
|
xor cx,cx
|
||||||
|
1: inc cx
|
||||||
|
shr edx,1
|
||||||
|
rcr ebx,1 ! edx:ebx = y >> cl
|
||||||
|
and edx,edx
|
||||||
|
jnz 1b ! loop until y >> cl fits in ebx
|
||||||
|
|
||||||
|
! x / (y >> cl) = qh + (x + rh) / (y >> cl)
|
||||||
|
push edi
|
||||||
|
xor edx,edx ! edx:eax = xh
|
||||||
|
div ebx ! eax = qh, edx = rh
|
||||||
|
mov edi,eax
|
||||||
|
mov eax,xl+4(esp) ! push edi moved xl to xl+4
|
||||||
|
div ebx ! edi:eax = x / (y >> cl)
|
||||||
|
|
||||||
|
! q = (x / (y >> cl)) >> cl = esi:eax >> cl
|
||||||
|
shr eax,cl
|
||||||
|
neg cx ! cl = (32 - cl) modulo 32
|
||||||
|
shl edi,cl
|
||||||
|
or eax,edi ! eax = q
|
||||||
|
|
||||||
|
! Calculate the remainder x - q * y. If the subtraction
|
||||||
|
! overflows, then the correct quotient is q - 1, else it is q.
|
||||||
|
mov ecx,yh+4(esp)
|
||||||
|
imul ecx,eax ! ecx = q * yh
|
||||||
|
mov edi,eax
|
||||||
|
mul yl+4(esp) ! edx:eax = q * yl
|
||||||
|
add edx,ecx ! edx:eax = q * y
|
||||||
|
mov ebx,xl+4(esp)
|
||||||
|
mov ecx,xh+4(esp) ! ecx:ebx = x
|
||||||
|
sub ebx,eax
|
||||||
|
sbb ecx,edx ! ecx:ebx = remainder
|
||||||
|
jnc 1f
|
||||||
|
dec edi ! fix quotient
|
||||||
|
add ebx,yl+4(esp)
|
||||||
|
adc ebx,yh+4(esp) ! fix remainder
|
||||||
|
1: mov eax,edi
|
||||||
|
xor edx,edx ! edx:eax = quotient
|
||||||
|
pop edi ! negative flag
|
||||||
|
jmp 9b
|
20
mach/i386/libem/mli8.s
Normal file
20
mach/i386/libem/mli8.s
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
.sect .text; .sect .rom; .sect .data; .sect .bss
|
||||||
|
.sect .text
|
||||||
|
.define .mli8
|
||||||
|
|
||||||
|
yl=4
|
||||||
|
yh=8
|
||||||
|
! xl in eax
|
||||||
|
! xh in edx
|
||||||
|
|
||||||
|
.mli8:
|
||||||
|
! x * y = (xh + xl) * (yh + yl)
|
||||||
|
! = xh * yh + xh * yl + xl * yh + xl * yl
|
||||||
|
! The term xh * yh overflows to zero.
|
||||||
|
mov ecx,eax
|
||||||
|
imul ecx,yh(esp) ! ecx = xl * yh
|
||||||
|
imul edx,yl(esp) ! edx = xh * yl
|
||||||
|
add ecx,edx
|
||||||
|
mul yl(esp) ! edx:eax = xl * yl
|
||||||
|
add edx,ecx ! edx:eax = x * y
|
||||||
|
ret 8
|
|
@ -961,6 +961,14 @@ with EXACT rmorconst const
|
||||||
uses reusing %1,REG=%1
|
uses reusing %1,REG=%1
|
||||||
gen add %a,%2 yields %a
|
gen add %a,%2 yields %a
|
||||||
|
|
||||||
|
pat adi $1==8
|
||||||
|
with REG REG rmorconst rmorconst
|
||||||
|
gen add %1,%3
|
||||||
|
adc %2,%4 yields %2 %1
|
||||||
|
with rmorconst rmorconst REG REG
|
||||||
|
gen add %3,%1
|
||||||
|
adc %4,%2 yields %4 %3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat adi !defined($1)
|
pat adi !defined($1)
|
||||||
with CXREG ACC
|
with CXREG ACC
|
||||||
|
@ -969,13 +977,17 @@ with CXREG ACC
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pat sbi $1==4
|
pat sbi $1==4
|
||||||
|
|
||||||
with rmorconst REG
|
with rmorconst REG
|
||||||
gen sub %2,%1 yields %2
|
gen sub %2,%1 yields %2
|
||||||
with EXACT REG rmorconst
|
with EXACT REG rmorconst
|
||||||
gen sub %1,%2
|
gen sub %1,%2
|
||||||
neg %1 yields %1
|
neg %1 yields %1
|
||||||
|
|
||||||
|
pat sbi $1==8
|
||||||
|
with rmorconst rmorconst REG REG
|
||||||
|
gen sub %3,%1
|
||||||
|
sbb %4,%2 yields %4 %3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat sbi !defined($1)
|
pat sbi !defined($1)
|
||||||
with CXREG ACC
|
with CXREG ACC
|
||||||
|
@ -995,6 +1007,11 @@ with rm const
|
||||||
uses reusing %1,REG
|
uses reusing %1,REG
|
||||||
gen imul %a,%1,%2 yields %a
|
gen imul %a,%1,%2 yields %a
|
||||||
|
|
||||||
|
pat mli $1==8
|
||||||
|
with ACC DXREG
|
||||||
|
kills ALL
|
||||||
|
gen proccall {label,".mli8"} yields edx eax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat mli !defined($1)
|
pat mli !defined($1)
|
||||||
with ACC
|
with ACC
|
||||||
|
@ -1008,6 +1025,10 @@ with noacc ACC
|
||||||
gen cdq.
|
gen cdq.
|
||||||
idiv %1 yields eax
|
idiv %1 yields eax
|
||||||
|
|
||||||
|
pat dvi $1==8
|
||||||
|
kills ALL
|
||||||
|
gen proccall {label,".dvi8"} yields edx eax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat dvi !defined($1)
|
pat dvi !defined($1)
|
||||||
with ACC
|
with ACC
|
||||||
|
@ -1021,6 +1042,10 @@ with noacc ACC
|
||||||
gen cdq.
|
gen cdq.
|
||||||
idiv %1 yields edx
|
idiv %1 yields edx
|
||||||
|
|
||||||
|
pat rmi $1==8
|
||||||
|
kills ALL
|
||||||
|
gen proccall {label,".dvi8"} yields ecx ebx
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat rmi !defined($1)
|
pat rmi !defined($1)
|
||||||
with ACC
|
with ACC
|
||||||
|
@ -1032,6 +1057,12 @@ pat ngi $1==4
|
||||||
with REG
|
with REG
|
||||||
gen neg %1 yields %1
|
gen neg %1 yields %1
|
||||||
|
|
||||||
|
pat ngi $1==8
|
||||||
|
with REG REG
|
||||||
|
gen neg %2
|
||||||
|
neg %1
|
||||||
|
sbb %2,{ANYCON,0} yields %2 %1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat ngi !defined($1)
|
pat ngi !defined($1)
|
||||||
with ACC
|
with ACC
|
||||||
|
@ -1114,6 +1145,10 @@ with noacc ACC
|
||||||
uses DXREG={ANYCON,0}
|
uses DXREG={ANYCON,0}
|
||||||
gen div %1 yields eax
|
gen div %1 yields eax
|
||||||
|
|
||||||
|
pat dvu $1==8
|
||||||
|
kills ALL
|
||||||
|
gen proccall {label,".dvu8"} yields edx eax
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat dvu !defined($1)
|
pat dvu !defined($1)
|
||||||
with ACC STACK
|
with ACC STACK
|
||||||
|
@ -1126,6 +1161,10 @@ with noacc ACC
|
||||||
uses DXREG={ANYCON,0}
|
uses DXREG={ANYCON,0}
|
||||||
gen div %1 yields edx
|
gen div %1 yields edx
|
||||||
|
|
||||||
|
pat rmu $1==8
|
||||||
|
kills ALL
|
||||||
|
gen proccall {label,".dvu8"} yields ecx ebx
|
||||||
|
|
||||||
/*
|
/*
|
||||||
pat rmu !defined($1)
|
pat rmu !defined($1)
|
||||||
with ACC STACK
|
with ACC STACK
|
||||||
|
|
Loading…
Reference in a new issue