mirror of
https://github.com/SEPPDROID/Digital-Research-Source-Code.git
synced 2025-10-23 00:14:25 +00:00
296 lines
14 KiB
ArmAsm
296 lines
14 KiB
ArmAsm
ttl ffp sine cosine tangent (ffpsin/ffpcos/ffptan/ffpsincs)
|
||
***************************************
|
||
* (c) copyright 1981 by motorola inc. *
|
||
***************************************
|
||
|
||
*************************************************
|
||
* ffpsin ffpcos ffptan ffpsincs *
|
||
* fast floating point sine/cosine/tangent *
|
||
* *
|
||
* input: d7 - input argument (radian) *
|
||
* *
|
||
* output: d7 - function result *
|
||
* (ffpsincs also returns d6) *
|
||
* *
|
||
* all other registers totally transparent *
|
||
* *
|
||
* code size: 334 bytes stack work: 38 bytes *
|
||
* *
|
||
* condition codes: *
|
||
* z - set if result in d7 is zero *
|
||
* n - set if result in d7 is negative *
|
||
* c - undefined *
|
||
* v - set if result is meaningless *
|
||
* (input magnitude too large) *
|
||
* x - undefined *
|
||
* *
|
||
* functions: *
|
||
* ffpsin - sine result *
|
||
* ffpcos - cosine result *
|
||
* ffptan - tangent result *
|
||
* ffpsincs - both sine and cosine *
|
||
* d6 - sin, d7 - cosine *
|
||
* *
|
||
* notes: *
|
||
* 1) input values are in radians. *
|
||
* 2) function ffpsincs returns both sine *
|
||
* and cosine twice as fast as calculating *
|
||
* the two functions independently for *
|
||
* the same value. this is handy for *
|
||
* graphics processing. *
|
||
* 2) input arguments larger than two pi *
|
||
* suffer reduced precision. the larger *
|
||
* the argument, the smaller the precision.*
|
||
* excessively large arguments which have *
|
||
* less than 5 bits of precision are *
|
||
* returned unchanged with the "v" bit set.*
|
||
* 3) for tangent angles of infinite value *
|
||
* the largest possible positive number *
|
||
* is returned ($ffffff7f). this still *
|
||
* gives results well within single *
|
||
* precision calculation. *
|
||
* 4) spot checks show errors bounded by *
|
||
* 4 x 10**-7 but for arguments close to *
|
||
* pi/2 intervals where 10**-5 is seen. *
|
||
* *
|
||
* time: (8mhz no wait states and argument *
|
||
* assumed within +-pi) *
|
||
* *
|
||
* ffpsin 413 microseconds *
|
||
* ffpcos 409 microseconds *
|
||
* ffptan 501 microseconds *
|
||
* ffpsincs 420 microseconds *
|
||
*************************************************
|
||
page
|
||
ffpsin idnt 1,2 ffp sine cosine tangent
|
||
|
||
opt pcs
|
||
|
||
section 9
|
||
|
||
xdef ffpsin,ffpcos,ffptan,ffpsincs entry points
|
||
|
||
xref ffptheta inverse tangent table
|
||
|
||
xref ffpmul2,ffpdiv,ffpsub multiply, divide and subtract
|
||
xref ffptnorm transcendental normalize routine
|
||
xref ffpcpyrt copyright stub
|
||
|
||
pi equ $c90fdb42 floating constant pi
|
||
fixedpi equ $c90fdaa2 pi skeleton to 32 bits precision
|
||
inv2pi equ $a2f9833e inverse of two-pi
|
||
kinv equ $9b74ee40 floating k inverse
|
||
nkfact equ $ec916240 negative k inverse
|
||
|
||
********************************************
|
||
* entry for returning both sine and cosine *
|
||
********************************************
|
||
ffpsincs move.w #-2,-(sp) flag both sine and cosine wanted
|
||
bra.s fpscom enter common code
|
||
|
||
**********************
|
||
* tangent entry point*
|
||
**********************
|
||
ffptan move.w #-1,-(sp) flag tangent with minus value
|
||
bra.s fpschl check very small values
|
||
|
||
**************************
|
||
* cosine only entry point*
|
||
**************************
|
||
ffpcos move.w #1,-(sp) flag cosine with positive value
|
||
bra.s fpscom enter common code
|
||
|
||
* negative sine/tangent small value check
|
||
fpschm cmp.b #$80+$40-8,d7 ? less or same as -2**-9
|
||
bhi.s fpscom continue if not too small
|
||
* return argument
|
||
fpsrti add.l #2,sp rid internal parameter
|
||
tst.b d7 set condition codes
|
||
rts return to caller
|
||
|
||
************************
|
||
* sine only entry point*
|
||
************************
|
||
ffpsin clr.w -(sp) flag sine with zero
|
||
* sine and tangent values < 2**-9 return identities
|
||
fpschl tst.b d7 test sign
|
||
bmi.s fpschm branch minus
|
||
cmp.b #$40-8,d7 ? less or same than 2**-9
|
||
bls.s fpsrti return identity
|
||
|
||
* save registers and insure input within + or - pi range
|
||
fpscom movem.l d1-d6/a0,-(sp) save all work registers
|
||
move.l d7,d2 copy input over
|
||
add.b d7,d7 rid sign bit
|
||
cmp.b #(64+5)<<1,d7 ? abs(arg) < 2**6 (32)
|
||
bls.s fpsnlr branch yes, not too large
|
||
* argument is too large to subtract to within range
|
||
cmp.b #(64+20)<<1,d7 ? test excessive size (>2**20)
|
||
bls.s fpsgpr no, go ahead and use
|
||
* error - argument so large result has no precision
|
||
* or.b #$02,ccr force v bit on
|
||
dc.l $003c0002 *****assembler error*****
|
||
movem.l (sp)+,d1-d6/a0 restore registers
|
||
add.l #2,sp clean internal argument off stack
|
||
rts return to caller
|
||
|
||
* we must find mod(arg,twopi) since argument is too large for subtractions
|
||
fpsgpr move.l #inv2pi,d6 load up 2*pi inverse constant
|
||
move.l d2,d7 copy over input argument
|
||
jsr ffpmul2 divide by 2pi (via multiply inverse)
|
||
* convert quotient to float integer
|
||
move.b d7,d5 copy exponent over
|
||
and.b #$7f,d5 rid sign from exponent
|
||
sub.b #64+24,d5 find fractional precision
|
||
neg.b d5 make positive
|
||
move.l #-1,d4 setup mask of all ones
|
||
clr.b d4 start zeroes at low byte
|
||
lsl.l d5,d4 shift zeroes into fractional part
|
||
or.b #$ff,d4 do not remove sign and exponent
|
||
and.l d4,d7 strip fractional bits entirely
|
||
move.l #pi+1,d6 load up 2*pi constant
|
||
jsr ffpmul2 multiply back out
|
||
move.l d7,d6 setup to subtract multiple of twopi
|
||
move.l d2,d7 move argument in
|
||
jsr ffpsub find remainder of twopi divide
|
||
move.l d7,d2 use it as new input argument
|
||
|
||
* convert argument to binary(31,26) precision for reduction within +-pi
|
||
fpsnlr move.l #$0c90fdaa,d4 fixedpi>>4 load pi
|
||
move.l d2,d7 copy float argument
|
||
clr.b d7 clear sign and exponent
|
||
tst.b d2 test sign
|
||
bmi.s fpsnmi branch negative
|
||
sub.b #64+6,d2 obtain shift value
|
||
neg.b d2 for 5 bit non-fraction bits
|
||
cmp.b #31,d2 ? very small number
|
||
bls.s fpssh1 no, go ahead and shift
|
||
move.l #0,d7 force to zero
|
||
fpssh1 lsr.l d2,d7 convert to fixed point
|
||
* force to +pi or below
|
||
fpspck cmp.l d4,d7 ? greater than pi
|
||
ble.s fpsckm branch not
|
||
sub.l d4,d7 subtract
|
||
sub.l d4,d7 . twopi
|
||
bra.s fpspck and check again
|
||
|
||
fpsnmi sub.b #$80+64+6,d2 rid sign and get shift value
|
||
neg.b d2 for 5 non-fractional bits
|
||
cmp.b #31,d2 ? very small number
|
||
bls.s fpssh2 no, go ahead and shift
|
||
move.l #0,d7 force to zero
|
||
fpssh2 lsr.l d2,d7 convert to fixed point
|
||
neg.l d7 make negative
|
||
neg.l d4 make -pi
|
||
* force to -pi or above
|
||
fpsnck cmp.l d4,d7 ? less than -pi
|
||
bge.s fpsckm branch not
|
||
sub.l d4,d7 add
|
||
sub.l d4,d7 . twopi
|
||
bra.s fpsnck and check again
|
||
|
||
*****************************************
|
||
* cordic calculation registers: *
|
||
* d1 - loop count a0 - table pointer *
|
||
* d2 - shift count *
|
||
* d3 - x' d5 - x *
|
||
* d4 - y' d6 - y *
|
||
* d7 - test argument *
|
||
*****************************************
|
||
|
||
* input within range, now start cordic setup
|
||
fpsckm move.l #0,d5 x=0
|
||
move.l #nkfact,d6 y=negative inverse k factor seed
|
||
move.l #$3243f6a8,d4 fixedpi>>2, setup fixed pi/2 constant
|
||
asl.l #3,d7 now to binary(31,29) precision
|
||
bmi.s fpsap2 branch if minus to add pi/2
|
||
neg.l d6 y=positive inverse k factor seed
|
||
neg.l d4 subtract pi/2 for positive argument
|
||
fpsap2 add.l d4,d7 add constant
|
||
lea ffptheta,a0 load arctangent table
|
||
move.l #23,d1 loop 24 times
|
||
move.l #-1,d2 prime shift counter
|
||
* cordic loop
|
||
fsinlp add.w #1,d2 increment shift count
|
||
move.l d5,d3 copy x
|
||
move.l d6,d4 copy y
|
||
asr.l d2,d3 shift for x'
|
||
asr.l d2,d4 shift for y'
|
||
tst.l d7 test arg value
|
||
bmi.s fsbmi branch minus test
|
||
sub.l d4,d5 x=x-y'
|
||
add.l d3,d6 y=y+x'
|
||
sub.l (a0)+,d7 arg=arg-table(n)
|
||
dbra d1,fsinlp loop until done
|
||
bra.s fscom enter common code
|
||
fsbmi add.l d4,d5 x=x+y'
|
||
sub.l d3,d6 y=y-x'
|
||
add.l (a0)+,d7 arg=arg+table(n)
|
||
dbra d1,fsinlp loop until done
|
||
|
||
* now split up tangent and ffpsincs from sine and cosine
|
||
fscom move.w 7*4(sp),d1 reload internal parameter
|
||
bpl.s fssincos branch for sine or cosine
|
||
|
||
add.b #1,d1 see if was -1 for tangent
|
||
bne.s fsdual no, must be both sin and cosine
|
||
* tangent finish
|
||
bsr.s fsfloat float y (sin)
|
||
move.l d6,d7 setup for divide into
|
||
move.l d5,d6 prepare x
|
||
bsr.s fsfloat float x (cos)
|
||
beq.s fstinf branch infinite result
|
||
jsr ffpdiv tangent = sin/cos
|
||
fsinfrt movem.l (sp)+,d1-d6/a0 restore registers
|
||
add.l #2,sp delete internal parameter
|
||
rts return to caller
|
||
* tangent is infinite. return maximum positive number.
|
||
fstinf move.l #$ffffff7f,d7 largest ffp number
|
||
bra.s fsinfrt and clean up
|
||
|
||
* sine and cosine
|
||
fssincos beq.s fssine branch if sine
|
||
move.l d5,d6 use x for cosine
|
||
fssine bsr.s fsfloat convert to float
|
||
move.l d6,d7 return result
|
||
tst.b d7 and condition code test
|
||
movem.l (sp)+,d1-d6/a0 restore registers
|
||
add.l #2,sp delete internal parameter
|
||
rts return to caller
|
||
|
||
* both sine and cosine
|
||
fsdual move.l d5,-(sp) save cosine derivitive
|
||
bsr.s fsfloat convert sine derivitive to float
|
||
move.l d6,6*4(sp) place sine into saved d6
|
||
move.l (sp)+,d6 restore cosine derivitive
|
||
bra.s fssine and continue restoring sine on the sly
|
||
|
||
* fsfloat - float internal precision but truncate to zero if < 2**-21
|
||
fsfloat move.l d6,d4 copy internal precision value
|
||
bmi.s fsfneg branch negative
|
||
cmp.l #$000000ff,d6 ? test magnitude
|
||
bhi dobranch ffptnorm normalize if not too small
|
||
fsfzro move.l #0,d6 return a zero
|
||
rts return to caller
|
||
fsfneg asr.l #8,d4 see if all ones bits 8-31
|
||
add.l #1,d4 ? goes to zero
|
||
bne dobranch ffptnorm normalize if not too small
|
||
bra.s fsfzro return zero
|
||
dobranch:
|
||
jmp ffptnorm for loader problem
|
||
|
||
|
||
end
|
||
|
||
|