mirror of
https://github.com/SEPPDROID/Digital-Research-Source-Code.git
synced 2025-10-24 08:54:17 +00:00
113 lines
4.9 KiB
ArmAsm
113 lines
4.9 KiB
ArmAsm
ttl fast floating point square root (ffpsqrt)
|
|
*******************************************
|
|
* (c) copyright 1981 by motorola inc. *
|
|
*******************************************
|
|
|
|
********************************************
|
|
* ffpsqrt subroutine *
|
|
* *
|
|
* input: *
|
|
* d7 - floating point argument *
|
|
* *
|
|
* output: *
|
|
* d7 - floating point square root *
|
|
* *
|
|
* condition codes: *
|
|
* *
|
|
* n - cleared *
|
|
* z - set if result is zero *
|
|
* v - set if argument was negative*
|
|
* c - cleared *
|
|
* x - undefined *
|
|
* *
|
|
* registers d3 thru d6 are volatile *
|
|
* *
|
|
* code: 194 bytes stack work: 4 bytes *
|
|
* *
|
|
* notes: *
|
|
* 1) no overflows or underflows can *
|
|
* occur. *
|
|
* 2) a negative argument causes the *
|
|
* absolute value to be used and the *
|
|
* "v" bit set to indicate that a *
|
|
* negative square root was attempted. *
|
|
* *
|
|
* times: *
|
|
* argument zero 3.50 microseconds *
|
|
* minimum time > 0 187.50 microseconds *
|
|
* average time > 0 193.75 microseconds *
|
|
* maximum time > 0 200.00 microseconds *
|
|
********************************************
|
|
page
|
|
ffpsqrt idnt 1,1 ffp square root
|
|
|
|
section 9
|
|
|
|
xdef ffpsqrt entry point
|
|
xref ffpcpyrt copyright notice
|
|
|
|
* negative argument handler
|
|
fpsinv and.b #$7f,d7 take absolute value
|
|
bsr.s ffpsqrt find sqrt(abs(x))
|
|
* or.b $02,ccr set "v" bit
|
|
dc.l $003c0002 **assembler error**
|
|
rts return to caller
|
|
|
|
*********************
|
|
* square root entry *
|
|
*********************
|
|
ffpsqrt move.b d7,d3 copy s+exponent over
|
|
beq.s fpsrtn return zero if zero argument
|
|
bmi.s fpsinv negative, reject with special condition codes
|
|
lsr.b #1,d3 divide exponent by two
|
|
bcc.s fpseven branch exponent was even
|
|
add.b #1,d3 adjust odd values up by one
|
|
lsr.l #1,d7 offset odd exponent's mantissa one bit
|
|
fpseven add.b #$20,d3 renormalize exponent
|
|
swap.w d3 save result s+exp for final move
|
|
move.w #23,d3 setup loop for 24 bit generation
|
|
lsr.l #7,d7 prepare first test value
|
|
move.l d7,d4 d4 - previous value during loop
|
|
move.l d7,d5 d5 - new test value during loop
|
|
move.l a0,d6 save address register
|
|
lea fpstbl(pc),a0 load table address
|
|
move.l #$00800000,d7 d7 - initial result (must be a one)
|
|
sub.l d7,d4 preset old value in case zero bit next
|
|
sub.l #$01200000,d5 combine first loop calculations
|
|
bra.s fpsent go enter loop calculations
|
|
|
|
* square root calculation
|
|
* this is an optimized scheme for the recursive square root algorithm:
|
|
*
|
|
* step n+1:
|
|
* test value <= .0 0 0 r r r 0 1 then generate a one in result r
|
|
* n 2 1 n 2 1 else a zero in result r n+1
|
|
* n+1
|
|
* precalculations are done such that the entry is midway into step 2
|
|
|
|
fpsone bset d3,d7 insert a one into this position
|
|
move.l d5,d4 update new test value
|
|
fpszero add.l d4,d4 multiply test result by two
|
|
move.l d4,d5 copy in case next bit zero
|
|
sub.l (a0)+,d5 subtract the '01' ending pattern
|
|
sub.l d7,d5 subtract result bits collected so far
|
|
fpsent dbmi d3,fpsone branch if a one generated in the result
|
|
dbpl d3,fpszero branch if a zero generated
|
|
|
|
* all 24 bits calculated. now test result of 25th bit
|
|
bls.s fpsfin branch next bit zero, no rounding
|
|
add.l #1,d7 round up (cannot overflow)
|
|
fpsfin lsl.l #8,d7 normalize result
|
|
move.l d6,a0 restore address register
|
|
swap.w d3 restore s+exp save
|
|
move.b d3,d7 move in final sign+exponent
|
|
fpsrtn rts return to caller
|
|
|
|
* table to furnish '01' shifts during the algorithm loop
|
|
fpstbl dc.l 1<<20,1<<19,1<<18,1<<17,1<<16,1<<15
|
|
dc.l 1<<14,1<<13,1<<12,1<<11,1<<10,1<<9,1<<8
|
|
dc.l 1<<7,1<<6,1<<5,1<<4,1<<3,1<<2,1<<1,1<<0
|
|
dc.l 0,0
|
|
|
|
end
|