ack/mach/z80/libem/dvu4.s

143 lines
3.2 KiB
ArmAsm
Raw Normal View History

1985-03-29 21:44:50 +00:00
.define .dvu4
1987-02-02 13:30:20 +00:00
.sect .text
.sect .rom
.sect .data
.sect .bss
.sect .text
1985-03-29 21:44:50 +00:00
! 4-byte divide routine for z80
! parameters:
! stack: divisor
! dividend
! stack: quotient (out)
! bc de: remainder (out) (high part in bc)
! a n-byte divide may be implemented
! using 2 (virtual) registers:
! - a n-byte register containing
! the divisor
! - a 2n-byte shiftregister (VSR)
!
! Initially, the VSR contains the dividend
! in its low (right) n bytes and zeroes in its
! high n bytes. The dividend is shifted
! left into a "window" bit by bit. After
! each shift, the contents of the window
! is compared with the divisor. If it is
! higher or equal, the divisor is subtracted from
! it and a "1" bit is inserted in the
! VSR from the right side! else a "0" bit
! is inserted. These bits are shifted left
! too during subsequent iterations.
! At the end, the rightmost part of VSR
! contains the quotient.
! For n=4, we need 2*4+4 = 12 bytes of
! registers. Unfortunately we only have
! 5 2-byte registers on the z80
! (bc,de,hl,ix and iy). Therefore we use
! an overlay technique for the rightmost
! 4 bytes of the VSR. The 32 iterations
! are split up into two groups: during
! the first 16 iterations we use the high
! order 16 bits of the dividend! during
! the last 16 iterations we use the
! low order 16 bits.
! register allocation:
! VSR iy hl ix
! divisor -de bc
.dvu4:
! initialization
pop hl ! save return address
ld (.retaddr),hl
pop bc ! low part (2 bytes)
! of divisor in bc
xor a ! clear carry, a := 0
ld h,a ! hl := 0
ld l,a
ld (.flag),a ! first pass main loop
pop de ! high part divisor
sbc hl,de ! inverse of high part
ex de,hl ! of divisor in de
pop hl ! save low part of
! dividend in memory
ld (.low),hl ! used during second
! iteration over main loop
pop ix ! high part of dividend
push iy ! save LB
ld h,a ! hl := 0
ld l,a
ld iy,0 ! now the VSR is initialized
! main loop, done twice
1:
ld a,16
! sub-loop, done 16 times
2:
add iy,iy ! shift VSR left
add ix,ix
adc hl,hl
jp nc,3f
inc iy
3:
or a ! subtract divisor from
! window (iy hl)
ld (.iysave),iy
sbc hl,bc
jr nc,4f ! decrement iy if there
! was no borrow
dec iy
4:
add iy,de ! there is no "sbc iy,ss"
! on the z80, so de was
! inverted during init.
inc ix
! see if the result is non-negative,
! otherwise undo the subtract.
! note that this uncooperating machine
! does not set its S -or Z flag after
! a 16-bit add.
ex (sp),iy ! does anyone see a better
ex (sp),hl ! solution ???
bit 7,h
ex (sp),hl
ex (sp),iy
jp z,5f
! undo the subtract
add hl,bc
ld iy,(.iysave)
dec ix
5:
dec a
jr nz,2b
ld a,(.flag) ! see if this was first or
! second iteration of main loop
or a ! 0=first, 1=second
jr nz,6f
inc a ! a := 1
ld (.flag),a ! flag := 1
ld (.result),ix ! save high part of result
ld ix,(.low) ! initialize second
! iteration, ix := low
! part of dividend
jr 1b
6:
! clean up
push iy ! transfer remainder
pop bc ! from iy-hl to bc-de
ex de,hl
pop iy ! restore LB
ld hl,(.result) ! high part of result
push hl
push ix ! low part of result
ld hl,(.retaddr)
jp (hl) ! return
1987-02-02 13:30:20 +00:00
.sect .data
.flag: .data1 0
.low: .data2 0
.iysave: .data2 0
.retaddr: .data2 0
.result: .data2 0