Added end library and floating point processor support

This commit is contained in:
ceriel 1991-02-01 15:09:58 +00:00
parent e64fb88a5d
commit 0f4e675b50
13 changed files with 718 additions and 124 deletions

View file

@ -1,5 +1,9 @@
LIST
Makefile
compmodule
em_end.s
etext.s
edata.s
end.s
libem_s.a
READ_ME

View file

@ -26,7 +26,7 @@ shp.s
sig.s
cms.s
gto.s
ffp.s
fp68881.s
fat.s
trp.s
dia.s

View file

@ -1,28 +1,33 @@
# $Header$
MACH=m68k2
all: libem_o.a end.o
ASAR=aal
all: libem_o.a end.a
install: all
../../install libem_o.a tail_em
../../install end.o end_em
../../install end.a end_em
cmp: all
-../../compare libem_o.a tail_em
-../../compare end.o end_em
-../../compare end.a end_em
end.o: end.s
end.a: em_end.s etext.s edata.s end.s
$(MACH) -I../../../h -c em_end.s
$(MACH) -I../../../h -c edata.s
$(MACH) -I../../../h -c etext.s
$(MACH) -I../../../h -c end.s
$(ASAR) cr end.a em_end.o etext.o edata.o end.o
libem_o.a: libem_s.a
ASAR=aal ; export ASAR ;\
ASAR=$(ASAR) ; export ASAR ;\
march . libem_o.a
clean:
rm -f *.o libem_o.a
rm -f *.o libem_o.a end.a
opr :
make pr | opr
pr:
@arch pv libem_s.a | pr -h `pwd`/libem_s.a
@pr `pwd`/end.s
@pr `pwd`/em_end.s `pwd`/edata.s `pwd`/etext.s `pwd`/end.s

View file

@ -1,5 +1,4 @@
The original EM library routines saved all registers
(including scratch registers) in global data; hence they
were not reentrant.
The new routines do not save registers d0,d1,d2,a0 and a1.
They are reentrant.
The routines in mli.s, mlu.s, dvi.s, and dvu.s are written by
Kai-Uwe Bloem and were published on the comp.os.minix newsgroup.
He allowed us to use them for ACK, but requested that
they do not fall under the ACK copyright notice. So, they don't.

View file

@ -5,38 +5,96 @@
.sect .bss
! signed long divide
!-----------------------------------------------------------------------------
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
! #1 01/12/90 initial revision. Minor reduce of shift operations.
! #2 03/07/90 use 68000 divu instruction whereever possible. This change
! makes #1 superflous. (derived from my GNU division routine)
!-----------------------------------------------------------------------------
! Some common cases can be handled in a special, much faster way :
! 1) divisor = 0
! => cause trap, then return to user. Result is undefined
! 2) dividend < divisor
! => quotient = 0, remainder = dividend
! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
! => quotient and remainder can be calculated quite fast by repeated
! application of 68000 divu operations (ca. 400 cycles)
! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
! => do slow division by shift and subtract
!-----------------------------------------------------------------------------
! register usage:
! : d0 divisor
! d1 dividend
! exit : d1 quotient
! d2 remainder
.sect .text
.dvi:
move.l (sp)+,a0 ! return address
move.l (sp)+,d0
move.l (sp)+,d1
move.l d3,-(sp) ! save d3 and d4
move.l d4,-(sp)
move.l (sp)+,a1 ! return address
move.l (sp)+,d0 ! divisor
move.l (sp)+,d2 ! dividend
move.l d3,a0 ! save d3
move.l d4,-(sp) ! save result sign register
clr.l d4
tst.l d0 ! divisor
bpl 1f
neg.l d0
not d4
1:
tst.l d1 ! dividend
bpl 2f
neg.l d1
not d4
swap d4
not d4
swap d4
tst.l d2
bpl 0f ! dividend is negative ?
neg.l d2 ! yes - negate
not.l d4 ! and note negation in d4
0:
tst.l d0
bpl 0f ! divisor is negative ?
neg.l d0 ! yes - negate
not.w d4 ! note negation
0:
clr.l d1 ! prepare quotient
! === case 1: divisor = 0
tst.l d0 ! divisor = 0 ?
beq 9f ! yes - divide by zero trap
! === case 2: dividend < divisor
cmp.l d0,d2 ! dividend < divisor ?
bcs 8f ! yes - division already finished
! === case 3: divisor <= 0x0ffff
cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ?
bhi 2f
move.w d2,d3 ! save dividend.l
clr.w d2 ! prepare dividend.h for divu operation
swap d2
beq 0f ! dividend.h is all zero, no divu necessary
divu d0,d2
0: move.w d2,d1 ! save quotient.h
swap d1
move.w d3,d2 ! divide dividend.l
divu d0,d2 ! (d2.h = remainder of prev divu)
move.w d2,d1 ! save qoutient.l
clr.w d2 ! get remainder
swap d2
bra 8f
! === case 4: divisor and dividend both > 0x0ffff
2:
move.l d1,-(sp)
move.l d0,-(sp)
jsr .dvu
tst d4
beq 5f
neg.l d1 ! quotient
move #32-1,d3 ! loop count
4:
lsl.l #1,d2 ! shift dividend ...
roxl.l #1,d1 ! ... into d1
cmp.l d0,d1 ! compare with divisor
bcs 5f
sub.l d0,d1 ! bigger, subtract divisor
add #1,d2 ! note subtraction in result
5:
tst.l d4
bpl 6f
neg.l d2 ! remainder
6:
move.l (sp)+,d4 ! restore d4 and d3
move.l (sp)+,d3
jmp (a0)
dbra d3,4b
exg d1,d2 ! get results in the correct registers
8:
tst.w d4 ! quotient < 0 ?
bpl 0f
neg.l d1 ! yes - negate
0: tst.l d4 ! remainder < 0 ?
bpl 0f
neg.l d2
0: move.l (sp)+,d4 ! restore d4
move.l a0,d3 ! restore d3
jmp (a1)
EIDIVZ = 6
9: move.w #EIDIVZ,-(sp)
jsr .trp

View file

@ -5,34 +5,77 @@
.sect .bss
! unsigned long divide
!-----------------------------------------------------------------------------
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
! #1 01/12/90 initial revision. Minor reduce of shift operations.
! #2 03/07/90 use 68000 divu instruction whereever possible. This change
! makes #1 superflous. (derived from my GNU division routine)
!-----------------------------------------------------------------------------
! Some common cases can be handled in a special, much faster way :
! 1) divisor = 0
! => cause trap, then return to user. Result is undefined
! 2) dividend < divisor
! => quotient = 0, remainder = dividend
! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
! => quotient and remainder can be calculated quite fast by repeated
! application of 68000 divu operations (ca. 400 cycles)
! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
! => do slow division by shift and subtract
!-----------------------------------------------------------------------------
! register usage:
! : d0 divisor
! d1 dividend
! exit : d1 quotient
! d2 remainder
.sect .text
.dvu:
move.l d3,a0 ! save d3
move.l (sp)+,a1 ! return address
move.l (sp)+,d0
move.l (sp)+,d1
move.l d3,-(sp) ! save d3
tst.l d0
bne 0f
move.l (sp)+,d3
move.w #EIDIVZ,-(sp)
jsr .trp
0:
clr.l d2
move.l #32,d3
3:
lsl.l #1,d1
roxl.l #1,d2
cmp.l d0,d2
blt 4f
sub.l d0,d2
add #1,d1
move.l (sp)+,d0 ! divisor
move.l (sp)+,d2 ! dividend
clr.l d1 ! prepare quotient
! === case 1: divisor = 0
tst.l d0 ! divisor = 0 ?
beq 9f ! yes - divide by zero trap
! === case 2: dividend < divisor
cmp.l d0,d2 ! dividend < divisor ?
bcs 8f ! yes - division already finished
! === case 3: divisor <= 0x0ffff
cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ?
bhi 2f
move.w d2,d3 ! save dividend.l
clr.w d2 ! prepare dividend.h for divu operation
swap d2
beq 0f ! dividend.h is all zero, no divu necessary
divu d0,d2
0: move.w d2,d1 ! save quotient.h
swap d1
move.w d3,d2 ! divide dividend.l
divu d0,d2 ! (d2.h = remainder of prev divu)
move.w d2,d1 ! save qoutient.l
clr.w d2 ! get remainder
swap d2
bra 8f
! === case 4: divisor and dividend both > 0x0ffff
2:
move #32-1,d3 ! loop count
4:
sub #1,d3
bgt 3b
move.l (sp)+,d3
lsl.l #1,d2 ! shift dividend ...
roxl.l #1,d1 ! ... into d1
cmp.l d0,d1 ! compare with divisor
bcs 5f
sub.l d0,d1 ! bigger, subtract divisor
add #1,d2 ! note subtraction in result
5:
dbra d3,4b
exg d1,d2 ! get results in the correct registers
8:
move.l a0,d3 ! restore d3
jmp (a1)
EIDIVZ = 6
9: move.w #EIDIVZ,-(sp)
jsr .trp

9
mach/m68k2/libem/edata.s Normal file
View file

@ -0,0 +1,9 @@
.sect .text
.sect .rom
.sect .data
.sect .bss
.define _edata
.sect .data
.align 4
.sect .data
_edata:

22
mach/m68k2/libem/em_end.s Normal file
View file

@ -0,0 +1,22 @@
.sect .text
.sect .rom
.sect .data
.sect .bss
.define endtext,enddata,endbss,__end
.sect .text
.align 4
.sect .rom
.align 4
.sect .data
.align 4
.sect .bss
.align 4
.sect .end ! only for declaration of _end, __end and endbss.
.sect .text
endtext:
.sect .data
enddata:
.sect .end
__end:
endbss:

View file

@ -1,16 +1,7 @@
.define endtext,enddata,endbss,_etext,_edata,_end
.sect .text
.sect .rom
.sect .data
.sect .bss
.sect .end ! only for declaration of _end and endbss.
.sect .text
endtext:
_etext:
.sect .data
enddata:
_edata:
.sect .end
.define _end
.sect .end ! only for declaration of _end, __end and endbss.
_end:
endbss:

9
mach/m68k2/libem/etext.s Normal file
View file

@ -0,0 +1,9 @@
.sect .text
.sect .rom
.sect .data
.sect .bss
.define _etext
.sect .text
.align 4
.sect .text
_etext:

352
mach/m68k2/libem/fp68881.s Normal file
View file

@ -0,0 +1,352 @@
.define .adf4, .adf8, .sbf4, .sbf8, .mlf4, .mlf8, .dvf4, .dvf8
.define .ngf4, .ngf8, .fif4, .fif8, .fef4, .fef8
.define .cif4, .cif8, .cuf4, .cuf8, .cfi, .cfu, .cff4, .cff8
.define .cmf4, .cmf8
.sect .text
.sect .rom
.sect .data
.sect .bss
! $Header$
! Implement interface to floating point package for M68881
.sect .text
.adf4:
move.l (sp)+,a0
fmove.s (sp),fp0
fadd.s 4(sp),fp0
fmove.s fp0,4(sp)
jmp (a0)
.adf8:
move.l (sp)+,a0
fmove.d (sp),fp0
fadd.d 8(sp),fp0
fmove.d fp0,8(sp)
jmp (a0)
.sbf4:
move.l (sp)+,a0
fmove.s (sp),fp0
fmove.s 4(sp),fp1
fsub fp0,fp1
fmove.s fp1,4(sp)
jmp (a0)
.sbf8:
move.l (sp)+,a0
fmove.d (sp),fp0
fmove.d 8(sp),fp1
fsub fp0,fp1
fmove.d fp1,8(sp)
jmp (a0)
.mlf4:
move.l (sp)+,a0
fmove.s (sp),fp0
fmul.s 4(sp),fp0
fmove.s fp0,4(sp)
jmp (a0)
.mlf8:
move.l (sp)+,a0
fmove.d (sp),fp0
fmul.d 8(sp),fp0
fmove.d fp0,8(sp)
jmp (a0)
.dvf4:
move.l (sp)+,a0
fmove.s (sp),fp0
fmove.s 4(sp),fp1
fdiv fp0,fp1
fmove.s fp1,4(sp)
jmp (a0)
.dvf8:
move.l (sp)+,a0
fmove.d (sp),fp0
fmove.d 8(sp),fp1
fdiv fp0,fp1
fmove.d fp1,8(sp)
jmp (a0)
.ngf4:
fmove.s 4(sp),fp0
fneg fp0
fmove.s fp0,4(sp)
rts
.ngf8:
fmove.d 4(sp),fp0
fneg fp0
fmove.d fp0,4(sp)
rts
.fif4:
move.l (sp)+,a0
move.l (sp),a1
fmove.s 4(sp),fp0
fmove.s 8(sp),fp1
fmul fp0,fp1
fintrz fp1,fp0
fsub fp0,fp1
fmove.s fp1,4(a1)
fmove.s fp0,(a1)
jmp (a0)
.fif8:
move.l (sp)+,a0
move.l (sp),a1
fmove.d 4(sp),fp0
fmove.d 12(sp),fp1
fmul fp0,fp1
fintrz fp1,fp0
fsub fp0,fp1
fmove.d fp1,8(a1)
fmove.d fp0,(a1)
jmp (a0)
.fef4:
move.l (sp)+,a0
move.l (sp),a1
fmove.s 4(sp),fp0
fgetexp fp0,fp1
fmove.l fpsr,d0
and.l #0x2000,d0 ! set if Infinity
beq 1f
move.w #129,(a1)
fmove.s 4(sp),fp0
fblt 2f
move.l #0x3f000000,2(a1)
jmp (a0)
2:
move.l #0xbf000000,2(a1)
jmp (a0)
1:
fmove.l fp1,d0
add.l #1,d0
fgetman fp0
fbne 1f
clr.l d0
bra 2f
1:
fmove.l #2,fp1
fdiv fp1,fp0
2:
fmove.s fp0,2(a1)
move.w d0,(a1)
jmp (a0)
.fef8:
move.l (sp)+,a0
move.l (sp),a1
fmove.d 4(sp),fp0
fgetexp fp0,fp1
fmove.l fpsr,d0
and.l #0x2000,d0 ! set if Infinity
beq 1f
move.w #1025,(a1)
fmove.d 4(sp),fp0
fblt 2f
move.l #0x3fe00000,2(a1)
clr.l 6(a1)
jmp (a0)
2:
move.l #0xbfe00000,2(a1)
clr.l 6(a1)
jmp (a0)
1:
fmove.l fp1,d0
add.l #1,d0
fgetman fp0
fbne 1f
clr.l d0
bra 2f
1:
fmove.l #2,fp1
fdiv fp1,fp0
2:
fmove.d fp0,2(a1)
move.w d0,(a1)
jmp (a0)
.cif4:
move.l (sp)+,a0
cmp.w #2,(sp)
bne 1f
fmove.w 2(sp),fp0
fmove.s fp0,(sp)
jmp (a0)
1:
fmove.l 2(sp),fp0
fmove.s fp0,2(sp)
jmp (a0)
.cif8:
move.l (sp)+,a0
cmp.w #2,(sp)
bne 1f
fmove.w 2(sp),fp0
fmove.d fp0,(sp)
jmp (a0)
1:
fmove.l 2(sp),fp0
fmove.d fp0,(sp)
jmp (a0)
.cuf4:
move.l (sp)+,a0
cmp.w #2,(sp)
bne 2f
fmove.w 2(sp),fp0
tst.w 2(sp)
bge 1f
fadd.l #65536,fp0
1:
fmove.s fp0,(sp)
jmp (a0)
2:
fmove.l 2(sp),fp0
tst.l 2(sp)
bge 1f
fsub.l #-2147483648,fp0
fsub.l #-2147483648,fp0
1:
fmove.s fp0,2(sp)
jmp (a0)
.cuf8:
move.l (sp)+,a0
move.w (sp),d0
cmp.w #2,d0
bne 2f
fmove.w 2(sp),fp0
tst.w 2(sp)
bge 1f
fadd.l #65536,fp0
bra 1f
2:
fmove.l 2(sp),fp0
tst.l 2(sp)
bge 1f
fsub.l #-2147483648,fp0
fsub.l #-2147483648,fp0
1:
fmove.d fp0,(sp)
jmp (a0)
.cfi:
move.l (sp)+,a0
move.w (sp),d1
move.w 2(sp),d0
cmp.w #4,d0
bne 1f
fmove.s 4(sp),fp0
bra 2f
1:
fmove.d 4(sp),fp0
add.l #4,sp
2:
cmp.w #2,d1
bne 1f
fmove.w fp0,6(sp)
bra 2f
1:
fmove.l fp0,4(sp)
2:
cmp.w #4,d0
beq 1f
sub.l #4,sp
1:
jmp (a0)
.cfu:
move.l (sp)+,a0
move.w (sp),d1
move.w 2(sp),d2
cmp.w #4,d2
bne 1f
fmove.s 4(sp),fp0
fabs fp0
cmp.l #0x4f000000,4(sp)
bge 2f
fintrz fp0,fp0
fmove.l fp0,d0
bra 3f
2:
fadd.l #-2147483648,fp0
fintrz fp0,fp0
fmove.l fp0,d0
bchg #31,d0
bra 3f
1:
fmove.d 4(sp),fp0
add.l #4,sp
fabs fp0
cmp.l #0x41e00000,(sp)
bge 1f
fintrz fp0,fp0
fmove.l fp0,d0
bra 3f
1:
fadd.l #-2147483648,fp0
fintrz fp0,fp0
fmove.l fp0,d0
bchg #31,d0
3:
cmp.w #2,d1
bne 1f
move.w d0,6(sp)
bra 2f
1:
move.l d0,4(sp)
2:
cmp.w #4,d2
beq 1f
sub.l #4,sp
1:
jmp (a0)
.cff4:
move.l (sp)+,a0
fmove.d (sp),fp0
fmove.s fp0,4(sp)
jmp (a0)
.cff8:
move.l (sp)+,a0
fmove.s (sp),fp0
fmove.d fp0,(sp)
jmp (a0)
.cmf4:
move.l (sp)+,a0
clr.l d0
fmove.s (sp),fp0
fmove.s 4(sp),fp1
fcmp fp0,fp1
fbeq 2f
fblt 1f
add.l #1,d0
jmp (a0)
1:
sub.l #1,d0
2:
jmp (a0)
.cmf8:
move.l (sp)+,a0
clr.l d0
fmove.d (sp),fp0
fmove.d 8(sp),fp1
fcmp fp0,fp1
fbeq 2f
fblt 1f
add.l #1,d0
jmp (a0)
1:
sub.l #1,d0
2:
jmp (a0)

View file

@ -4,31 +4,92 @@
.sect .data
.sect .bss
! signed long mulitply
!-----------------------------------------------------------------------------
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
! #1 01/12/90 initial revision
!-----------------------------------------------------------------------------
! 3 cases worth to recognize :
! 1) both the upper word of u and v are zero
! => 1 mult : Low*Low
! 2) only one of the upper words is zero
! => 2 mult : Low*HighLow
! 3) both upper words are not zero
! => 4 mult : HighLow*HighLow
! there are other cases (e.g. lower word is zero but high word is not, or
! one operand is all zero). However, this seems not to be very common, so
! they are ignored for the price of superfluous multiplications in these
! cases.
!-----------------------------------------------------------------------------
! entry : d0 multiplicand
! d1 multiplier
! exit : d0 high order result
! d1 low order result
! d2,a0,a1 : destroyed
.sect .text
.mli:
move.l (sp)+,a0
move.l (sp)+,d1
move.l (sp)+,d0
move.l d5,-(sp)
clr d5
tst.l d0
bpl 1f
neg.l d0
not d5
1:
tst.l d1
bpl 2f
neg.l d1
not d5
2:
move.l d0,-(sp)
move.l d1,-(sp)
jsr .mlu
tst d5
beq 3f
move.l (sp)+,a1 ! return address
move.l d3,a0 ! save register
movem.w (sp)+,d0-d3 ! get v and u
move.w d5,-(sp) ! save sign register
move.w d2,d5
bge 0f ! negate u if neccessary
neg.w d1
negx.w d0
0: tst.w d0
bge 0f ! negate v if neccessary
eor.w d0,d5
neg.w d1
negx.w d0
0: bne 1f ! case 2) or 3)
tst.w d2
bne 2f ! case 2)
! === case 1: _l x _l ===
mulu d3,d1 ! r.l = u.l x v.l
9: ! (r.h is already zero)
tst.w d5 ! negate result if neccessary
bpl 0f
neg.l d1
negx.l d0
0: move.w (sp)+,d5 ! return
move.l a0,d3
jmp (a1)
! === possibly case 2) or case 3) ===
1:
tst.w d2
bne 3f ! case 3)
! === case 2: _l x hl ===
exg d0,d2 ! exchange u and v
exg d1,d3 ! (minimizes number of distinct cases)
2:
mulu d1,d2 ! a = v.l x u.h
mulu d3,d1 ! r.l = v.l x u.l
swap d2 ! a = a << 16
clr.l d3
move.w d2,d3
clr.w d2
add.l d2,d1 ! r += a
addx.l d3,d0
bra 9b
! === case 3: hl x hl ===
3:
move.l (sp)+,d5
jmp (a0)
move.l d4,-(sp) ! need more registers
move.w d2,d4
mulu d1,d4 ! a = v.l x u.h
mulu d3,d1 ! r.l = u.l x v.l
mulu d0,d3 ! b = v.h x u.l
mulu d2,d0 ! r.h = u.h x v.h
swap d1 ! (just for simplicity)
add.w d4,d1 ! r += a << 16
clr.w d4
swap d4
addx.l d4,d0
add.w d3,d1 ! r += b << 16
clr.w d3
swap d3
addx.l d3,d0
swap d1
move.l (sp)+,d4 ! return
bra 9b

View file

@ -4,38 +4,79 @@
.sect .data
.sect .bss
! unsigned long mulitply
!-----------------------------------------------------------------------------
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
! #1 01/12/90 initial revision
!-----------------------------------------------------------------------------
! 3 cases worth to recognize :
! 1) both the upper word of u and v are zero
! => 1 mult : Low*Low
! 2) only one of the upper words is zero
! => 2 mult : Low*HighLow
! 3) both upper words are not zero
! => 4 mult : HighLow*HighLow
! there are other cases (e.g. lower word is zero but high word is not, or
! one operand is all zero). However, this seems not to be very common, so
! they are ignored for the price of superfluous multiplications in these
! cases.
!-----------------------------------------------------------------------------
! entry : d0 multiplicand
! d1 multiplier
! exit : d0 high order result
! d1 low order result
! d2,a0,a1 : destroyed
.sect .text
.mlu:
move.l (sp)+,a1
move.l (sp)+,d1
move.l (sp)+,d0
movem.l d3/d4/d6,-(sp)
move.l d1,d3
move.l d1,d2
swap d2
move.l d2,d4
mulu d0,d1
mulu d0,d2
swap d0
mulu d0,d3
mulu d4,d0
clr.l d6
swap d1
add d2,d1
addx.l d6,d0
add d3,d1
addx.l d6,d0
swap d1
clr d2
clr d3
swap d2
swap d3
add.l d2,d0
add.l d3,d0
movem.l (sp)+,d3/d4/d6
move.l (sp)+,a1 ! return address
move.l d3,a0 ! save register
movem.w (sp)+,d0-d3 ! get v and u
tst.w d0
bne 1f ! case 2) or 3)
tst.w d2
bne 2f ! case 2)
! === case 1: _l x _l ===
mulu d3,d1 ! r.l = u.l x v.l
move.l a0,d3 ! (r.h is already zero)
jmp (a1) ! return
! === possibly case 2) or case 3) ===
1:
tst.w d2
bne 3f ! case 3)
! === case 2: _l x hl ===
exg d0,d2 ! exchange u and v
exg d1,d3 ! (minimizes number of distinct cases)
2:
mulu d1,d2 ! a = v.l x u.h
mulu d3,d1 ! r.l = v.l x u.l
swap d2 ! a = a << 16
clr.l d3
move.w d2,d3
clr.w d2
add.l d2,d1 ! r += a
addx.l d3,d0
move.l a0,d3 ! return
jmp (a1)
! === case 3: hl x hl ===
3:
move.l d4,-(sp) ! need more registers
move.w d2,d4
mulu d1,d4 ! a = v.l x u.h
mulu d3,d1 ! r.l = u.l x v.l
mulu d0,d3 ! b = v.h x u.l
mulu d2,d0 ! r.h = u.h x v.h
swap d1 ! (just for simplicity)
add.w d4,d1 ! r += a << 16
clr.w d4
swap d4
addx.l d4,d0
add.w d3,d1 ! r += b << 16
clr.w d3
swap d3
addx.l d3,d0
swap d1
move.l (sp)+,d4 ! return
move.l a0,d3
jmp (a1)