mirror of
https://github.com/SEPPDROID/Digital-Research-Source-Code.git
synced 2025-10-23 16:34:07 +00:00
168 lines
6.9 KiB
ArmAsm
168 lines
6.9 KiB
ArmAsm
ttl fast floating point divide (ffpdiv)
|
|
*****************************************
|
|
* (c) copyright 1980 by motorola inc. *
|
|
*****************************************
|
|
|
|
********************************************
|
|
* ffpdiv subroutine *
|
|
* *
|
|
* input: *
|
|
* d6 - floating point divisor *
|
|
* d7 - floating point dividend *
|
|
* *
|
|
* output: *
|
|
* d7 - floating point quotient *
|
|
* *
|
|
* condition codes: *
|
|
* n - set if result negative *
|
|
* z - set if result zero *
|
|
* v - set if result overflowed *
|
|
* c - undefined *
|
|
* x - undefined *
|
|
* *
|
|
* registers d3 thru d5 volatile *
|
|
* *
|
|
* code: 150 bytes stack work: 0 bytes *
|
|
* *
|
|
* notes: *
|
|
* 1) divisor is unaltered (d6). *
|
|
* 2) underflows return zero without *
|
|
* any indicators set. *
|
|
* 3) overflows return the highest value *
|
|
* with the proper sign and the 'v' *
|
|
* bit set in the ccr. *
|
|
* 4) if a divide by zero is attempted *
|
|
* the divide by zero exception trap *
|
|
* is forced by this code with the *
|
|
* original arguments intact. if the *
|
|
* exception returns with the denom- *
|
|
* inator altered the divide operation *
|
|
* continues, otherwise an overflow *
|
|
* is forced with the proper sign. *
|
|
* the floating divide by zero can be *
|
|
* distinguished from true zero divide *
|
|
* by the fact that it is an immediate *
|
|
* zero dividing into register d7. *
|
|
* *
|
|
* time: (8 mhz no wait states assumed) *
|
|
* dividend zero 5.250 microseconds *
|
|
* minimum time others 72.750 microseconds *
|
|
* maximum time others 85.000 microseconds *
|
|
* average others 76.687 microseconds *
|
|
* *
|
|
********************************************
|
|
page
|
|
ffpdiv idnt 1,1 ffp divide
|
|
|
|
xdef ffpdiv entry point
|
|
xref ffpcpyrt copyright notice
|
|
|
|
section 9
|
|
|
|
* divide by zero exit
|
|
fpddzr divu.w #0,d7 **force divide by zero **
|
|
|
|
* if the exception returns with altered denominator - continue divide
|
|
tst.l d6 ? exception alter the zero
|
|
bne.s ffpdiv branch if so to continue
|
|
* setup maximum number for divide overflow
|
|
fpdovf or.l #$ffffff7f,d7 maximize with proper sign
|
|
tst.b d7 set condition code for sign
|
|
* or.w #$02,ccr set overflow bit
|
|
dc.l $003c0002 ******sick assembler******
|
|
fpdrtn rts return to caller
|
|
|
|
* over or underflow detected
|
|
fpdov2 swap.w d6 restore arg1
|
|
swap.w d7 restore arg2 for sign
|
|
fpdovfs eor.b d6,d7 setup correct sign
|
|
bra.s fpdovf and enter overflow handling
|
|
fpdouf bmi.s fpdovfs branch if overflow
|
|
fpdund move.l #0,d7 underflow to zero
|
|
rts and return to caller
|
|
|
|
***************
|
|
* entry point *
|
|
***************
|
|
|
|
* first subtract exponents
|
|
ffpdiv move.b d6,d5 copy arg1 (divisor)
|
|
beq.s fpddzr branch if divide by zero
|
|
move.l d7,d4 copy arg2 (dividend)
|
|
beq.s fpdrtn return zero if dividend zero
|
|
moveq #-128,d3 setup sign mask
|
|
add.w d5,d5 isolate arg1 sign from exponent
|
|
add.w d4,d4 isolate arg2 sign from exponent
|
|
eor.b d3,d5 adjust arg1 exponent to binary
|
|
eor.b d3,d4 adjust arg2 exponent to binary
|
|
sub.b d5,d4 subtract exponents
|
|
bvs.s fpdouf branch if overflow/underflow
|
|
clr.b d7 clear arg2 s+exp
|
|
swap.w d7 prepare high 16 bit compare
|
|
swap.w d6 against arg1 and arg2
|
|
cmp.w d6,d7 ? check if overflow will occur
|
|
bmi.s fpdnov branch if not
|
|
* adjust for fixed point divide overflow
|
|
add.b #2,d4 adjust exponent up one
|
|
bvs.s fpdov2 branch overflow here
|
|
ror.l #1,d7 shift down by power of two
|
|
fpdnov swap.w d7 correct arg2
|
|
move.b d3,d5 move $80 into d5.b
|
|
eor.w d5,d4 create sign and absolutize exponent
|
|
lsr.w #1,d4 d4.b now has sign+exponent of result
|
|
|
|
* now divide just using 16 bits into 24
|
|
move.l d7,d3 copy arg1 for initial divide
|
|
divu.w d6,d3 obtain test quotient
|
|
move.w d3,d5 save test quotient
|
|
|
|
* now multiply 16-bit divide result times full 24 bit divisor and compare
|
|
* with the dividend. multiplying back out with the full 24-bits allows
|
|
* us to see if the result was too large due to the 8 missing divisor bits
|
|
* used in the hardware divide. the result can only be too large by 1 unit.
|
|
mulu.w d6,d3 high divisor x quotient
|
|
sub.l d3,d7 d7=partial subtraction
|
|
swap.w d7 to low divisor
|
|
swap.w d6 rebuild arg1 to normal
|
|
move.w d6,d3 setup arg1 for product
|
|
clr.b d3 zero low byte
|
|
mulu.w d5,d3 find remaining product
|
|
sub.l d3,d7 now have full subtraction
|
|
bcc.s fpdqok branch first 16 bits correct
|
|
|
|
* estimate too high, decrement quotient by one
|
|
move.l d6,d3 rebuild divisor
|
|
clr.b d3 reverse halves
|
|
add.l d3,d7 add another divisor
|
|
sub.w #1,d5 decrement quotient
|
|
|
|
* compute last 8 bits with another divide. the exact remainder from the
|
|
* multiply and compare above is divided again by a 16-bit only divisor.
|
|
* however, this time we require only 9 bits of accuracy in the result
|
|
* (8 to make 24 bits total and 1 extra bit for rounding purposes) and this
|
|
* divide always returns a precision of at least 9 bits.
|
|
fpdqok move.l d6,d3 copy arg1 again
|
|
swap.w d3 first 16 bits divisor in d3.w
|
|
clr.w d7 into first 16 bits of dividend
|
|
divu.w d3,d7 obtain final 16 bit result
|
|
swap.w d5 first 16 quotient to high half
|
|
bmi.s fpdisn branch if normalized
|
|
* rare occurrance - unnormalized
|
|
* happends when mantissa arg1 < arg2 and they differ only in last 8 bits
|
|
move.w d7,d5 insert low word of quotient
|
|
add.l d5,d5 shift mantissa left one
|
|
sub.b #1,d4 adjust exponent down (cannot zero)
|
|
move.w d5,d7 cancel next instruction
|
|
|
|
* rebuild our final result and return
|
|
fpdisn move.w d7,d5 append next 16 bits
|
|
add.l #$80,d5 round to 24 bits (cannot overflow)
|
|
move.l d5,d7 return in d7
|
|
move.b d4,d7 finish result with sign+exponent
|
|
beq.s fpdund underflow if zero exponent
|
|
rts return result to caller
|
|
|
|
|
|
end
|
|
|