Added end library and floating point processor support
This commit is contained in:
parent
e64fb88a5d
commit
0f4e675b50
|
@ -1,5 +1,9 @@
|
|||
LIST
|
||||
Makefile
|
||||
compmodule
|
||||
em_end.s
|
||||
etext.s
|
||||
edata.s
|
||||
end.s
|
||||
libem_s.a
|
||||
READ_ME
|
||||
|
|
|
@ -26,7 +26,7 @@ shp.s
|
|||
sig.s
|
||||
cms.s
|
||||
gto.s
|
||||
ffp.s
|
||||
fp68881.s
|
||||
fat.s
|
||||
trp.s
|
||||
dia.s
|
||||
|
|
|
@ -1,28 +1,33 @@
|
|||
# $Header$
|
||||
MACH=m68k2
|
||||
all: libem_o.a end.o
|
||||
ASAR=aal
|
||||
all: libem_o.a end.a
|
||||
|
||||
install: all
|
||||
../../install libem_o.a tail_em
|
||||
../../install end.o end_em
|
||||
../../install end.a end_em
|
||||
|
||||
cmp: all
|
||||
-../../compare libem_o.a tail_em
|
||||
-../../compare end.o end_em
|
||||
-../../compare end.a end_em
|
||||
|
||||
end.o: end.s
|
||||
end.a: em_end.s etext.s edata.s end.s
|
||||
$(MACH) -I../../../h -c em_end.s
|
||||
$(MACH) -I../../../h -c edata.s
|
||||
$(MACH) -I../../../h -c etext.s
|
||||
$(MACH) -I../../../h -c end.s
|
||||
$(ASAR) cr end.a em_end.o etext.o edata.o end.o
|
||||
|
||||
libem_o.a: libem_s.a
|
||||
ASAR=aal ; export ASAR ;\
|
||||
ASAR=$(ASAR) ; export ASAR ;\
|
||||
march . libem_o.a
|
||||
|
||||
clean:
|
||||
rm -f *.o libem_o.a
|
||||
rm -f *.o libem_o.a end.a
|
||||
|
||||
opr :
|
||||
make pr | opr
|
||||
|
||||
pr:
|
||||
@arch pv libem_s.a | pr -h `pwd`/libem_s.a
|
||||
@pr `pwd`/end.s
|
||||
@pr `pwd`/em_end.s `pwd`/edata.s `pwd`/etext.s `pwd`/end.s
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
The original EM library routines saved all registers
|
||||
(including scratch registers) in global data; hence they
|
||||
were not reentrant.
|
||||
The new routines do not save registers d0,d1,d2,a0 and a1.
|
||||
They are reentrant.
|
||||
The routines in mli.s, mlu.s, dvi.s, and dvu.s are written by
|
||||
Kai-Uwe Bloem and were published on the comp.os.minix newsgroup.
|
||||
He allowed us to use them for ACK, but requested that
|
||||
they do not fall under the ACK copyright notice. So, they don't.
|
||||
|
|
|
@ -5,38 +5,96 @@
|
|||
.sect .bss
|
||||
|
||||
! signed long divide
|
||||
!-----------------------------------------------------------------------------
|
||||
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
|
||||
! #1 01/12/90 initial revision. Minor reduce of shift operations.
|
||||
! #2 03/07/90 use 68000 divu instruction whereever possible. This change
|
||||
! makes #1 superflous. (derived from my GNU division routine)
|
||||
!-----------------------------------------------------------------------------
|
||||
! Some common cases can be handled in a special, much faster way :
|
||||
! 1) divisor = 0
|
||||
! => cause trap, then return to user. Result is undefined
|
||||
! 2) dividend < divisor
|
||||
! => quotient = 0, remainder = dividend
|
||||
! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
|
||||
! => quotient and remainder can be calculated quite fast by repeated
|
||||
! application of 68000 divu operations (ca. 400 cycles)
|
||||
! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
|
||||
! => do slow division by shift and subtract
|
||||
!-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
! register usage:
|
||||
! : d0 divisor
|
||||
! d1 dividend
|
||||
! exit : d1 quotient
|
||||
! d2 remainder
|
||||
|
||||
.sect .text
|
||||
.dvi:
|
||||
move.l (sp)+,a0 ! return address
|
||||
move.l (sp)+,d0
|
||||
move.l (sp)+,d1
|
||||
move.l d3,-(sp) ! save d3 and d4
|
||||
move.l d4,-(sp)
|
||||
move.l (sp)+,a1 ! return address
|
||||
move.l (sp)+,d0 ! divisor
|
||||
move.l (sp)+,d2 ! dividend
|
||||
move.l d3,a0 ! save d3
|
||||
move.l d4,-(sp) ! save result sign register
|
||||
clr.l d4
|
||||
tst.l d0 ! divisor
|
||||
bpl 1f
|
||||
neg.l d0
|
||||
not d4
|
||||
1:
|
||||
tst.l d1 ! dividend
|
||||
bpl 2f
|
||||
neg.l d1
|
||||
not d4
|
||||
swap d4
|
||||
not d4
|
||||
swap d4
|
||||
tst.l d2
|
||||
bpl 0f ! dividend is negative ?
|
||||
neg.l d2 ! yes - negate
|
||||
not.l d4 ! and note negation in d4
|
||||
0:
|
||||
tst.l d0
|
||||
bpl 0f ! divisor is negative ?
|
||||
neg.l d0 ! yes - negate
|
||||
not.w d4 ! note negation
|
||||
0:
|
||||
clr.l d1 ! prepare quotient
|
||||
! === case 1: divisor = 0
|
||||
tst.l d0 ! divisor = 0 ?
|
||||
beq 9f ! yes - divide by zero trap
|
||||
! === case 2: dividend < divisor
|
||||
cmp.l d0,d2 ! dividend < divisor ?
|
||||
bcs 8f ! yes - division already finished
|
||||
! === case 3: divisor <= 0x0ffff
|
||||
cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ?
|
||||
bhi 2f
|
||||
move.w d2,d3 ! save dividend.l
|
||||
clr.w d2 ! prepare dividend.h for divu operation
|
||||
swap d2
|
||||
beq 0f ! dividend.h is all zero, no divu necessary
|
||||
divu d0,d2
|
||||
0: move.w d2,d1 ! save quotient.h
|
||||
swap d1
|
||||
move.w d3,d2 ! divide dividend.l
|
||||
divu d0,d2 ! (d2.h = remainder of prev divu)
|
||||
move.w d2,d1 ! save qoutient.l
|
||||
clr.w d2 ! get remainder
|
||||
swap d2
|
||||
bra 8f
|
||||
! === case 4: divisor and dividend both > 0x0ffff
|
||||
2:
|
||||
move.l d1,-(sp)
|
||||
move.l d0,-(sp)
|
||||
jsr .dvu
|
||||
tst d4
|
||||
beq 5f
|
||||
neg.l d1 ! quotient
|
||||
move #32-1,d3 ! loop count
|
||||
4:
|
||||
lsl.l #1,d2 ! shift dividend ...
|
||||
roxl.l #1,d1 ! ... into d1
|
||||
cmp.l d0,d1 ! compare with divisor
|
||||
bcs 5f
|
||||
sub.l d0,d1 ! bigger, subtract divisor
|
||||
add #1,d2 ! note subtraction in result
|
||||
5:
|
||||
tst.l d4
|
||||
bpl 6f
|
||||
neg.l d2 ! remainder
|
||||
6:
|
||||
move.l (sp)+,d4 ! restore d4 and d3
|
||||
move.l (sp)+,d3
|
||||
jmp (a0)
|
||||
dbra d3,4b
|
||||
exg d1,d2 ! get results in the correct registers
|
||||
8:
|
||||
tst.w d4 ! quotient < 0 ?
|
||||
bpl 0f
|
||||
neg.l d1 ! yes - negate
|
||||
0: tst.l d4 ! remainder < 0 ?
|
||||
bpl 0f
|
||||
neg.l d2
|
||||
0: move.l (sp)+,d4 ! restore d4
|
||||
move.l a0,d3 ! restore d3
|
||||
jmp (a1)
|
||||
|
||||
EIDIVZ = 6
|
||||
9: move.w #EIDIVZ,-(sp)
|
||||
jsr .trp
|
||||
|
|
|
@ -5,34 +5,77 @@
|
|||
.sect .bss
|
||||
|
||||
! unsigned long divide
|
||||
!-----------------------------------------------------------------------------
|
||||
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
|
||||
! #1 01/12/90 initial revision. Minor reduce of shift operations.
|
||||
! #2 03/07/90 use 68000 divu instruction whereever possible. This change
|
||||
! makes #1 superflous. (derived from my GNU division routine)
|
||||
!-----------------------------------------------------------------------------
|
||||
! Some common cases can be handled in a special, much faster way :
|
||||
! 1) divisor = 0
|
||||
! => cause trap, then return to user. Result is undefined
|
||||
! 2) dividend < divisor
|
||||
! => quotient = 0, remainder = dividend
|
||||
! 3) divisor < 0x10000 ( i.e. divisor is only 16 bits wide )
|
||||
! => quotient and remainder can be calculated quite fast by repeated
|
||||
! application of 68000 divu operations (ca. 400 cycles)
|
||||
! 4) otherwise (due to #2, #3 dividend, divisor both wider then 16 bits)
|
||||
! => do slow division by shift and subtract
|
||||
!-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
! register usage:
|
||||
! : d0 divisor
|
||||
! d1 dividend
|
||||
! exit : d1 quotient
|
||||
! d2 remainder
|
||||
|
||||
.sect .text
|
||||
.dvu:
|
||||
move.l d3,a0 ! save d3
|
||||
move.l (sp)+,a1 ! return address
|
||||
move.l (sp)+,d0
|
||||
move.l (sp)+,d1
|
||||
move.l d3,-(sp) ! save d3
|
||||
tst.l d0
|
||||
bne 0f
|
||||
move.l (sp)+,d3
|
||||
move.w #EIDIVZ,-(sp)
|
||||
jsr .trp
|
||||
0:
|
||||
clr.l d2
|
||||
move.l #32,d3
|
||||
3:
|
||||
lsl.l #1,d1
|
||||
roxl.l #1,d2
|
||||
cmp.l d0,d2
|
||||
blt 4f
|
||||
sub.l d0,d2
|
||||
add #1,d1
|
||||
move.l (sp)+,d0 ! divisor
|
||||
move.l (sp)+,d2 ! dividend
|
||||
clr.l d1 ! prepare quotient
|
||||
! === case 1: divisor = 0
|
||||
tst.l d0 ! divisor = 0 ?
|
||||
beq 9f ! yes - divide by zero trap
|
||||
! === case 2: dividend < divisor
|
||||
cmp.l d0,d2 ! dividend < divisor ?
|
||||
bcs 8f ! yes - division already finished
|
||||
! === case 3: divisor <= 0x0ffff
|
||||
cmp.l #0x0ffff,d0 ! is divisor only 16 bits wide ?
|
||||
bhi 2f
|
||||
move.w d2,d3 ! save dividend.l
|
||||
clr.w d2 ! prepare dividend.h for divu operation
|
||||
swap d2
|
||||
beq 0f ! dividend.h is all zero, no divu necessary
|
||||
divu d0,d2
|
||||
0: move.w d2,d1 ! save quotient.h
|
||||
swap d1
|
||||
move.w d3,d2 ! divide dividend.l
|
||||
divu d0,d2 ! (d2.h = remainder of prev divu)
|
||||
move.w d2,d1 ! save qoutient.l
|
||||
clr.w d2 ! get remainder
|
||||
swap d2
|
||||
bra 8f
|
||||
! === case 4: divisor and dividend both > 0x0ffff
|
||||
2:
|
||||
move #32-1,d3 ! loop count
|
||||
4:
|
||||
sub #1,d3
|
||||
bgt 3b
|
||||
move.l (sp)+,d3
|
||||
lsl.l #1,d2 ! shift dividend ...
|
||||
roxl.l #1,d1 ! ... into d1
|
||||
cmp.l d0,d1 ! compare with divisor
|
||||
bcs 5f
|
||||
sub.l d0,d1 ! bigger, subtract divisor
|
||||
add #1,d2 ! note subtraction in result
|
||||
5:
|
||||
dbra d3,4b
|
||||
exg d1,d2 ! get results in the correct registers
|
||||
8:
|
||||
move.l a0,d3 ! restore d3
|
||||
jmp (a1)
|
||||
|
||||
EIDIVZ = 6
|
||||
9: move.w #EIDIVZ,-(sp)
|
||||
jsr .trp
|
||||
|
|
9
mach/m68k2/libem/edata.s
Normal file
9
mach/m68k2/libem/edata.s
Normal file
|
@ -0,0 +1,9 @@
|
|||
.sect .text
|
||||
.sect .rom
|
||||
.sect .data
|
||||
.sect .bss
|
||||
.define _edata
|
||||
.sect .data
|
||||
.align 4
|
||||
.sect .data
|
||||
_edata:
|
22
mach/m68k2/libem/em_end.s
Normal file
22
mach/m68k2/libem/em_end.s
Normal file
|
@ -0,0 +1,22 @@
|
|||
.sect .text
|
||||
.sect .rom
|
||||
.sect .data
|
||||
.sect .bss
|
||||
.define endtext,enddata,endbss,__end
|
||||
.sect .text
|
||||
.align 4
|
||||
.sect .rom
|
||||
.align 4
|
||||
.sect .data
|
||||
.align 4
|
||||
.sect .bss
|
||||
.align 4
|
||||
.sect .end ! only for declaration of _end, __end and endbss.
|
||||
|
||||
.sect .text
|
||||
endtext:
|
||||
.sect .data
|
||||
enddata:
|
||||
.sect .end
|
||||
__end:
|
||||
endbss:
|
|
@ -1,16 +1,7 @@
|
|||
.define endtext,enddata,endbss,_etext,_edata,_end
|
||||
.sect .text
|
||||
.sect .rom
|
||||
.sect .data
|
||||
.sect .bss
|
||||
.sect .end ! only for declaration of _end and endbss.
|
||||
|
||||
.sect .text
|
||||
endtext:
|
||||
_etext:
|
||||
.sect .data
|
||||
enddata:
|
||||
_edata:
|
||||
.sect .end
|
||||
.define _end
|
||||
.sect .end ! only for declaration of _end, __end and endbss.
|
||||
_end:
|
||||
endbss:
|
||||
|
|
9
mach/m68k2/libem/etext.s
Normal file
9
mach/m68k2/libem/etext.s
Normal file
|
@ -0,0 +1,9 @@
|
|||
.sect .text
|
||||
.sect .rom
|
||||
.sect .data
|
||||
.sect .bss
|
||||
.define _etext
|
||||
.sect .text
|
||||
.align 4
|
||||
.sect .text
|
||||
_etext:
|
352
mach/m68k2/libem/fp68881.s
Normal file
352
mach/m68k2/libem/fp68881.s
Normal file
|
@ -0,0 +1,352 @@
|
|||
.define .adf4, .adf8, .sbf4, .sbf8, .mlf4, .mlf8, .dvf4, .dvf8
|
||||
.define .ngf4, .ngf8, .fif4, .fif8, .fef4, .fef8
|
||||
.define .cif4, .cif8, .cuf4, .cuf8, .cfi, .cfu, .cff4, .cff8
|
||||
.define .cmf4, .cmf8
|
||||
.sect .text
|
||||
.sect .rom
|
||||
.sect .data
|
||||
.sect .bss
|
||||
|
||||
! $Header$
|
||||
|
||||
! Implement interface to floating point package for M68881
|
||||
|
||||
.sect .text
|
||||
.adf4:
|
||||
move.l (sp)+,a0
|
||||
fmove.s (sp),fp0
|
||||
fadd.s 4(sp),fp0
|
||||
fmove.s fp0,4(sp)
|
||||
jmp (a0)
|
||||
|
||||
.adf8:
|
||||
move.l (sp)+,a0
|
||||
fmove.d (sp),fp0
|
||||
fadd.d 8(sp),fp0
|
||||
fmove.d fp0,8(sp)
|
||||
jmp (a0)
|
||||
|
||||
.sbf4:
|
||||
move.l (sp)+,a0
|
||||
fmove.s (sp),fp0
|
||||
fmove.s 4(sp),fp1
|
||||
fsub fp0,fp1
|
||||
fmove.s fp1,4(sp)
|
||||
jmp (a0)
|
||||
|
||||
.sbf8:
|
||||
move.l (sp)+,a0
|
||||
fmove.d (sp),fp0
|
||||
fmove.d 8(sp),fp1
|
||||
fsub fp0,fp1
|
||||
fmove.d fp1,8(sp)
|
||||
jmp (a0)
|
||||
|
||||
.mlf4:
|
||||
move.l (sp)+,a0
|
||||
fmove.s (sp),fp0
|
||||
fmul.s 4(sp),fp0
|
||||
fmove.s fp0,4(sp)
|
||||
jmp (a0)
|
||||
|
||||
.mlf8:
|
||||
move.l (sp)+,a0
|
||||
fmove.d (sp),fp0
|
||||
fmul.d 8(sp),fp0
|
||||
fmove.d fp0,8(sp)
|
||||
jmp (a0)
|
||||
|
||||
.dvf4:
|
||||
move.l (sp)+,a0
|
||||
fmove.s (sp),fp0
|
||||
fmove.s 4(sp),fp1
|
||||
fdiv fp0,fp1
|
||||
fmove.s fp1,4(sp)
|
||||
jmp (a0)
|
||||
|
||||
.dvf8:
|
||||
move.l (sp)+,a0
|
||||
fmove.d (sp),fp0
|
||||
fmove.d 8(sp),fp1
|
||||
fdiv fp0,fp1
|
||||
fmove.d fp1,8(sp)
|
||||
jmp (a0)
|
||||
|
||||
.ngf4:
|
||||
fmove.s 4(sp),fp0
|
||||
fneg fp0
|
||||
fmove.s fp0,4(sp)
|
||||
rts
|
||||
|
||||
.ngf8:
|
||||
fmove.d 4(sp),fp0
|
||||
fneg fp0
|
||||
fmove.d fp0,4(sp)
|
||||
rts
|
||||
|
||||
.fif4:
|
||||
move.l (sp)+,a0
|
||||
move.l (sp),a1
|
||||
fmove.s 4(sp),fp0
|
||||
fmove.s 8(sp),fp1
|
||||
fmul fp0,fp1
|
||||
fintrz fp1,fp0
|
||||
fsub fp0,fp1
|
||||
fmove.s fp1,4(a1)
|
||||
fmove.s fp0,(a1)
|
||||
jmp (a0)
|
||||
|
||||
.fif8:
|
||||
move.l (sp)+,a0
|
||||
move.l (sp),a1
|
||||
fmove.d 4(sp),fp0
|
||||
fmove.d 12(sp),fp1
|
||||
fmul fp0,fp1
|
||||
fintrz fp1,fp0
|
||||
fsub fp0,fp1
|
||||
fmove.d fp1,8(a1)
|
||||
fmove.d fp0,(a1)
|
||||
jmp (a0)
|
||||
|
||||
.fef4:
|
||||
move.l (sp)+,a0
|
||||
move.l (sp),a1
|
||||
fmove.s 4(sp),fp0
|
||||
fgetexp fp0,fp1
|
||||
fmove.l fpsr,d0
|
||||
and.l #0x2000,d0 ! set if Infinity
|
||||
beq 1f
|
||||
move.w #129,(a1)
|
||||
fmove.s 4(sp),fp0
|
||||
fblt 2f
|
||||
move.l #0x3f000000,2(a1)
|
||||
jmp (a0)
|
||||
2:
|
||||
move.l #0xbf000000,2(a1)
|
||||
jmp (a0)
|
||||
1:
|
||||
fmove.l fp1,d0
|
||||
add.l #1,d0
|
||||
fgetman fp0
|
||||
fbne 1f
|
||||
clr.l d0
|
||||
bra 2f
|
||||
1:
|
||||
fmove.l #2,fp1
|
||||
fdiv fp1,fp0
|
||||
2:
|
||||
fmove.s fp0,2(a1)
|
||||
move.w d0,(a1)
|
||||
jmp (a0)
|
||||
|
||||
.fef8:
|
||||
move.l (sp)+,a0
|
||||
move.l (sp),a1
|
||||
fmove.d 4(sp),fp0
|
||||
fgetexp fp0,fp1
|
||||
fmove.l fpsr,d0
|
||||
and.l #0x2000,d0 ! set if Infinity
|
||||
beq 1f
|
||||
move.w #1025,(a1)
|
||||
fmove.d 4(sp),fp0
|
||||
fblt 2f
|
||||
move.l #0x3fe00000,2(a1)
|
||||
clr.l 6(a1)
|
||||
jmp (a0)
|
||||
2:
|
||||
move.l #0xbfe00000,2(a1)
|
||||
clr.l 6(a1)
|
||||
jmp (a0)
|
||||
1:
|
||||
fmove.l fp1,d0
|
||||
add.l #1,d0
|
||||
fgetman fp0
|
||||
fbne 1f
|
||||
clr.l d0
|
||||
bra 2f
|
||||
1:
|
||||
fmove.l #2,fp1
|
||||
fdiv fp1,fp0
|
||||
2:
|
||||
fmove.d fp0,2(a1)
|
||||
move.w d0,(a1)
|
||||
jmp (a0)
|
||||
|
||||
.cif4:
|
||||
move.l (sp)+,a0
|
||||
cmp.w #2,(sp)
|
||||
bne 1f
|
||||
fmove.w 2(sp),fp0
|
||||
fmove.s fp0,(sp)
|
||||
jmp (a0)
|
||||
1:
|
||||
fmove.l 2(sp),fp0
|
||||
fmove.s fp0,2(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cif8:
|
||||
move.l (sp)+,a0
|
||||
cmp.w #2,(sp)
|
||||
bne 1f
|
||||
fmove.w 2(sp),fp0
|
||||
fmove.d fp0,(sp)
|
||||
jmp (a0)
|
||||
1:
|
||||
fmove.l 2(sp),fp0
|
||||
fmove.d fp0,(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cuf4:
|
||||
move.l (sp)+,a0
|
||||
cmp.w #2,(sp)
|
||||
bne 2f
|
||||
fmove.w 2(sp),fp0
|
||||
tst.w 2(sp)
|
||||
bge 1f
|
||||
fadd.l #65536,fp0
|
||||
1:
|
||||
fmove.s fp0,(sp)
|
||||
jmp (a0)
|
||||
2:
|
||||
fmove.l 2(sp),fp0
|
||||
tst.l 2(sp)
|
||||
bge 1f
|
||||
fsub.l #-2147483648,fp0
|
||||
fsub.l #-2147483648,fp0
|
||||
1:
|
||||
fmove.s fp0,2(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cuf8:
|
||||
move.l (sp)+,a0
|
||||
move.w (sp),d0
|
||||
cmp.w #2,d0
|
||||
bne 2f
|
||||
fmove.w 2(sp),fp0
|
||||
tst.w 2(sp)
|
||||
bge 1f
|
||||
fadd.l #65536,fp0
|
||||
bra 1f
|
||||
2:
|
||||
fmove.l 2(sp),fp0
|
||||
tst.l 2(sp)
|
||||
bge 1f
|
||||
fsub.l #-2147483648,fp0
|
||||
fsub.l #-2147483648,fp0
|
||||
1:
|
||||
fmove.d fp0,(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cfi:
|
||||
move.l (sp)+,a0
|
||||
move.w (sp),d1
|
||||
move.w 2(sp),d0
|
||||
cmp.w #4,d0
|
||||
bne 1f
|
||||
fmove.s 4(sp),fp0
|
||||
bra 2f
|
||||
1:
|
||||
fmove.d 4(sp),fp0
|
||||
add.l #4,sp
|
||||
2:
|
||||
cmp.w #2,d1
|
||||
bne 1f
|
||||
fmove.w fp0,6(sp)
|
||||
bra 2f
|
||||
1:
|
||||
fmove.l fp0,4(sp)
|
||||
2:
|
||||
cmp.w #4,d0
|
||||
beq 1f
|
||||
sub.l #4,sp
|
||||
1:
|
||||
jmp (a0)
|
||||
|
||||
.cfu:
|
||||
move.l (sp)+,a0
|
||||
move.w (sp),d1
|
||||
move.w 2(sp),d2
|
||||
cmp.w #4,d2
|
||||
bne 1f
|
||||
fmove.s 4(sp),fp0
|
||||
fabs fp0
|
||||
cmp.l #0x4f000000,4(sp)
|
||||
bge 2f
|
||||
fintrz fp0,fp0
|
||||
fmove.l fp0,d0
|
||||
bra 3f
|
||||
2:
|
||||
fadd.l #-2147483648,fp0
|
||||
fintrz fp0,fp0
|
||||
fmove.l fp0,d0
|
||||
bchg #31,d0
|
||||
bra 3f
|
||||
1:
|
||||
fmove.d 4(sp),fp0
|
||||
add.l #4,sp
|
||||
fabs fp0
|
||||
cmp.l #0x41e00000,(sp)
|
||||
bge 1f
|
||||
fintrz fp0,fp0
|
||||
fmove.l fp0,d0
|
||||
bra 3f
|
||||
1:
|
||||
fadd.l #-2147483648,fp0
|
||||
fintrz fp0,fp0
|
||||
fmove.l fp0,d0
|
||||
bchg #31,d0
|
||||
3:
|
||||
cmp.w #2,d1
|
||||
bne 1f
|
||||
move.w d0,6(sp)
|
||||
bra 2f
|
||||
1:
|
||||
move.l d0,4(sp)
|
||||
2:
|
||||
cmp.w #4,d2
|
||||
beq 1f
|
||||
sub.l #4,sp
|
||||
1:
|
||||
jmp (a0)
|
||||
|
||||
.cff4:
|
||||
move.l (sp)+,a0
|
||||
fmove.d (sp),fp0
|
||||
fmove.s fp0,4(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cff8:
|
||||
move.l (sp)+,a0
|
||||
fmove.s (sp),fp0
|
||||
fmove.d fp0,(sp)
|
||||
jmp (a0)
|
||||
|
||||
.cmf4:
|
||||
move.l (sp)+,a0
|
||||
clr.l d0
|
||||
fmove.s (sp),fp0
|
||||
fmove.s 4(sp),fp1
|
||||
fcmp fp0,fp1
|
||||
fbeq 2f
|
||||
fblt 1f
|
||||
add.l #1,d0
|
||||
jmp (a0)
|
||||
1:
|
||||
sub.l #1,d0
|
||||
2:
|
||||
jmp (a0)
|
||||
|
||||
.cmf8:
|
||||
move.l (sp)+,a0
|
||||
clr.l d0
|
||||
fmove.d (sp),fp0
|
||||
fmove.d 8(sp),fp1
|
||||
fcmp fp0,fp1
|
||||
fbeq 2f
|
||||
fblt 1f
|
||||
add.l #1,d0
|
||||
jmp (a0)
|
||||
1:
|
||||
sub.l #1,d0
|
||||
2:
|
||||
jmp (a0)
|
|
@ -4,31 +4,92 @@
|
|||
.sect .data
|
||||
.sect .bss
|
||||
|
||||
! signed long mulitply
|
||||
!-----------------------------------------------------------------------------
|
||||
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
|
||||
! #1 01/12/90 initial revision
|
||||
!-----------------------------------------------------------------------------
|
||||
! 3 cases worth to recognize :
|
||||
! 1) both the upper word of u and v are zero
|
||||
! => 1 mult : Low*Low
|
||||
! 2) only one of the upper words is zero
|
||||
! => 2 mult : Low*HighLow
|
||||
! 3) both upper words are not zero
|
||||
! => 4 mult : HighLow*HighLow
|
||||
! there are other cases (e.g. lower word is zero but high word is not, or
|
||||
! one operand is all zero). However, this seems not to be very common, so
|
||||
! they are ignored for the price of superfluous multiplications in these
|
||||
! cases.
|
||||
!-----------------------------------------------------------------------------
|
||||
|
||||
! entry : d0 multiplicand
|
||||
! d1 multiplier
|
||||
! exit : d0 high order result
|
||||
! d1 low order result
|
||||
! d2,a0,a1 : destroyed
|
||||
|
||||
.sect .text
|
||||
.mli:
|
||||
move.l (sp)+,a0
|
||||
move.l (sp)+,d1
|
||||
move.l (sp)+,d0
|
||||
move.l d5,-(sp)
|
||||
clr d5
|
||||
tst.l d0
|
||||
bpl 1f
|
||||
neg.l d0
|
||||
not d5
|
||||
1:
|
||||
tst.l d1
|
||||
bpl 2f
|
||||
neg.l d1
|
||||
not d5
|
||||
2:
|
||||
move.l d0,-(sp)
|
||||
move.l d1,-(sp)
|
||||
jsr .mlu
|
||||
tst d5
|
||||
beq 3f
|
||||
move.l (sp)+,a1 ! return address
|
||||
move.l d3,a0 ! save register
|
||||
movem.w (sp)+,d0-d3 ! get v and u
|
||||
move.w d5,-(sp) ! save sign register
|
||||
move.w d2,d5
|
||||
bge 0f ! negate u if neccessary
|
||||
neg.w d1
|
||||
negx.w d0
|
||||
0: tst.w d0
|
||||
bge 0f ! negate v if neccessary
|
||||
eor.w d0,d5
|
||||
neg.w d1
|
||||
negx.w d0
|
||||
0: bne 1f ! case 2) or 3)
|
||||
tst.w d2
|
||||
bne 2f ! case 2)
|
||||
! === case 1: _l x _l ===
|
||||
mulu d3,d1 ! r.l = u.l x v.l
|
||||
9: ! (r.h is already zero)
|
||||
tst.w d5 ! negate result if neccessary
|
||||
bpl 0f
|
||||
neg.l d1
|
||||
negx.l d0
|
||||
0: move.w (sp)+,d5 ! return
|
||||
move.l a0,d3
|
||||
jmp (a1)
|
||||
! === possibly case 2) or case 3) ===
|
||||
1:
|
||||
tst.w d2
|
||||
bne 3f ! case 3)
|
||||
! === case 2: _l x hl ===
|
||||
exg d0,d2 ! exchange u and v
|
||||
exg d1,d3 ! (minimizes number of distinct cases)
|
||||
2:
|
||||
mulu d1,d2 ! a = v.l x u.h
|
||||
mulu d3,d1 ! r.l = v.l x u.l
|
||||
swap d2 ! a = a << 16
|
||||
clr.l d3
|
||||
move.w d2,d3
|
||||
clr.w d2
|
||||
add.l d2,d1 ! r += a
|
||||
addx.l d3,d0
|
||||
bra 9b
|
||||
! === case 3: hl x hl ===
|
||||
3:
|
||||
move.l (sp)+,d5
|
||||
jmp (a0)
|
||||
move.l d4,-(sp) ! need more registers
|
||||
move.w d2,d4
|
||||
mulu d1,d4 ! a = v.l x u.h
|
||||
mulu d3,d1 ! r.l = u.l x v.l
|
||||
mulu d0,d3 ! b = v.h x u.l
|
||||
mulu d2,d0 ! r.h = u.h x v.h
|
||||
swap d1 ! (just for simplicity)
|
||||
add.w d4,d1 ! r += a << 16
|
||||
clr.w d4
|
||||
swap d4
|
||||
addx.l d4,d0
|
||||
add.w d3,d1 ! r += b << 16
|
||||
clr.w d3
|
||||
swap d3
|
||||
addx.l d3,d0
|
||||
swap d1
|
||||
move.l (sp)+,d4 ! return
|
||||
bra 9b
|
||||
|
|
|
@ -4,38 +4,79 @@
|
|||
.sect .data
|
||||
.sect .bss
|
||||
|
||||
! unsigned long mulitply
|
||||
!-----------------------------------------------------------------------------
|
||||
! rewritten by Kai-Uwe Bloem (i5110401@dbstu1.bitnet) for speed.
|
||||
! #1 01/12/90 initial revision
|
||||
!-----------------------------------------------------------------------------
|
||||
! 3 cases worth to recognize :
|
||||
! 1) both the upper word of u and v are zero
|
||||
! => 1 mult : Low*Low
|
||||
! 2) only one of the upper words is zero
|
||||
! => 2 mult : Low*HighLow
|
||||
! 3) both upper words are not zero
|
||||
! => 4 mult : HighLow*HighLow
|
||||
! there are other cases (e.g. lower word is zero but high word is not, or
|
||||
! one operand is all zero). However, this seems not to be very common, so
|
||||
! they are ignored for the price of superfluous multiplications in these
|
||||
! cases.
|
||||
!-----------------------------------------------------------------------------
|
||||
|
||||
! entry : d0 multiplicand
|
||||
! d1 multiplier
|
||||
! exit : d0 high order result
|
||||
! d1 low order result
|
||||
! d2,a0,a1 : destroyed
|
||||
|
||||
.sect .text
|
||||
.mlu:
|
||||
move.l (sp)+,a1
|
||||
move.l (sp)+,d1
|
||||
move.l (sp)+,d0
|
||||
movem.l d3/d4/d6,-(sp)
|
||||
move.l d1,d3
|
||||
move.l d1,d2
|
||||
swap d2
|
||||
move.l d2,d4
|
||||
mulu d0,d1
|
||||
mulu d0,d2
|
||||
swap d0
|
||||
mulu d0,d3
|
||||
mulu d4,d0
|
||||
clr.l d6
|
||||
swap d1
|
||||
add d2,d1
|
||||
addx.l d6,d0
|
||||
add d3,d1
|
||||
addx.l d6,d0
|
||||
swap d1
|
||||
clr d2
|
||||
clr d3
|
||||
swap d2
|
||||
swap d3
|
||||
add.l d2,d0
|
||||
add.l d3,d0
|
||||
movem.l (sp)+,d3/d4/d6
|
||||
move.l (sp)+,a1 ! return address
|
||||
move.l d3,a0 ! save register
|
||||
movem.w (sp)+,d0-d3 ! get v and u
|
||||
tst.w d0
|
||||
bne 1f ! case 2) or 3)
|
||||
tst.w d2
|
||||
bne 2f ! case 2)
|
||||
! === case 1: _l x _l ===
|
||||
mulu d3,d1 ! r.l = u.l x v.l
|
||||
move.l a0,d3 ! (r.h is already zero)
|
||||
jmp (a1) ! return
|
||||
! === possibly case 2) or case 3) ===
|
||||
1:
|
||||
tst.w d2
|
||||
bne 3f ! case 3)
|
||||
! === case 2: _l x hl ===
|
||||
exg d0,d2 ! exchange u and v
|
||||
exg d1,d3 ! (minimizes number of distinct cases)
|
||||
2:
|
||||
mulu d1,d2 ! a = v.l x u.h
|
||||
mulu d3,d1 ! r.l = v.l x u.l
|
||||
swap d2 ! a = a << 16
|
||||
clr.l d3
|
||||
move.w d2,d3
|
||||
clr.w d2
|
||||
add.l d2,d1 ! r += a
|
||||
addx.l d3,d0
|
||||
move.l a0,d3 ! return
|
||||
jmp (a1)
|
||||
! === case 3: hl x hl ===
|
||||
3:
|
||||
move.l d4,-(sp) ! need more registers
|
||||
move.w d2,d4
|
||||
mulu d1,d4 ! a = v.l x u.h
|
||||
mulu d3,d1 ! r.l = u.l x v.l
|
||||
mulu d0,d3 ! b = v.h x u.l
|
||||
mulu d2,d0 ! r.h = u.h x v.h
|
||||
swap d1 ! (just for simplicity)
|
||||
add.w d4,d1 ! r += a << 16
|
||||
clr.w d4
|
||||
swap d4
|
||||
addx.l d4,d0
|
||||
add.w d3,d1 ! r += b << 16
|
||||
clr.w d3
|
||||
swap d3
|
||||
addx.l d3,d0
|
||||
swap d1
|
||||
move.l (sp)+,d4 ! return
|
||||
move.l a0,d3
|
||||
jmp (a1)
|
||||
|
|
Loading…
Reference in a new issue