mirror of
				https://github.com/SEPPDROID/Digital-Research-Source-Code.git
				synced 2025-10-25 01:14:21 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			286 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|          ttl       ffp sine cosine tangent (ffpsin/ffpcos/ffptan/ffpsincs)
 | |
| ***************************************
 | |
| * (c) copyright 1981 by motorola inc. *
 | |
| ***************************************
 | |
|  
 | |
| *************************************************
 | |
| *        ffpsin ffpcos ffptan ffpsincs          *
 | |
| *     fast floating point sine/cosine/tangent   *
 | |
| *                                               *
 | |
| *  input:   d7 - input argument (radian)        *
 | |
| *                                               *
 | |
| *  output:  d7 - function result                *
 | |
| *           (ffpsincs also returns d6)          *
 | |
| *                                               *
 | |
| *     all other registers totally transparent   *
 | |
| *                                               *
 | |
| *  code size: 334 bytes   stack work: 38 bytes  *
 | |
| *                                               *
 | |
| *  condition codes:                             *
 | |
| *        z - set if result in d7 is zero        *
 | |
| *        n - set if result in d7 is negative    *
 | |
| *        c - undefined                          *
 | |
| *        v - set if result is meaningless       *
 | |
| *            (input magnitude too large)        *
 | |
| *        x - undefined                          *
 | |
| *                                               *
 | |
| *  functions:                                   *
 | |
| *             ffpsin   -  sine result           *
 | |
| *             ffpcos   -  cosine result         *
 | |
| *             ffptan   -  tangent result        *
 | |
| *             ffpsincs -  both sine and cosine  *
 | |
| *                         d6 - sin, d7 - cosine *
 | |
| *                                               *
 | |
| *  notes:                                       *
 | |
| *    1) input values are in radians.            *
 | |
| *    2) function ffpsincs returns both sine     *
 | |
| *       and cosine twice as fast as calculating *
 | |
| *       the two functions independently for     *
 | |
| *       the same value.  this is handy for      *
 | |
| *       graphics processing.                    *
 | |
| *    2) input arguments larger than two pi      *
 | |
| *       suffer reduced precision.  the larger   *
 | |
| *       the argument, the smaller the precision.*
 | |
| *       excessively large arguments which have  *
 | |
| *       less than 5 bits of precision are       *
 | |
| *       returned unchanged with the "v" bit set.*
 | |
| *    3) for tangent angles of infinite value    *
 | |
| *       the largest possible positive number    *
 | |
| *       is returned ($ffffff7f). this still     *
 | |
| *       gives results well within single        *
 | |
| *       precision calculation.                  *
 | |
| *    4) spot checks show errors bounded by      *
 | |
| *       4 x 10**-7 but for arguments close to   *
 | |
| *       pi/2 intervals where 10**-5 is seen.    *
 | |
| *                                               *
 | |
| *  time: (8mhz no wait states and argument      *
 | |
| *         assumed within +-pi)                  *
 | |
| *                                               *
 | |
| *           ffpsin       413 microseconds       *
 | |
| *           ffpcos       409 microseconds       *
 | |
| *           ffptan       501 microseconds       *
 | |
| *           ffpsincs     420 microseconds       *
 | |
| *************************************************
 | |
|          page
 | |
| ffpsin   idnt  1,2 ffp sine cosine tangent
 | |
|  
 | |
|          opt       pcs
 | |
|  
 | |
|          section   9
 | |
|  
 | |
|          xdef      ffpsin,ffpcos,ffptan,ffpsincs entry points
 | |
|  
 | |
|          xref      ffptheta                    inverse tangent table
 | |
|  
 | |
|          xref      ffpmul2,ffpdiv,ffpsub    multiply, divide and subtract
 | |
|          xref      ffptnorm          transcendental normalize routine
 | |
|          xref      ffpcpyrt            copyright stub
 | |
|  
 | |
| pi       equ       $c90fdb42          floating constant pi
 | |
| fixedpi  equ       $c90fdaa2          pi skeleton to 32 bits precision
 | |
| inv2pi   equ       $a2f9833e          inverse of two-pi
 | |
| kinv     equ       $9b74ee40          floating k inverse
 | |
| nkfact   equ       $ec916240          negative k inverse
 | |
|  
 | |
| ********************************************
 | |
| * entry for returning both sine and cosine *
 | |
| ********************************************
 | |
| ffpsincs move.w    #-2,-(sp)           flag both sine and cosine wanted
 | |
|          bra.s     fpscom              enter common code
 | |
|  
 | |
| **********************
 | |
| * tangent entry point*
 | |
| **********************
 | |
| ffptan   move.w    #-1,-(sp)           flag tangent with minus value
 | |
|          bra.s     fpschl              check very small values
 | |
|  
 | |
| **************************
 | |
| * cosine only entry point*
 | |
| **************************
 | |
| ffpcos   move.w    #1,-(sp)            flag cosine with positive value
 | |
|          bra.s     fpscom              enter common code
 | |
|  
 | |
| * negative sine/tangent small value check
 | |
| fpschm   cmp.b     #$80+$40-8,d7       ? less or same as -2**-9
 | |
|          bhi.s     fpscom              continue if not too small
 | |
| * return argument
 | |
| fpsrti   add.l     #2,sp               rid internal parameter
 | |
|          tst.b     d7                  set condition codes
 | |
|          rts                           return to caller
 | |
|  
 | |
| ************************
 | |
| * sine only entry point*
 | |
| ************************
 | |
| ffpsin   clr.w     -(sp)               flag sine with zero
 | |
| * sine and tangent values < 2**-9 return identities
 | |
| fpschl   tst.b     d7                  test sign
 | |
|          bmi.s     fpschm              branch minus
 | |
|          cmp.b     #$40-8,d7           ? less or same than 2**-9
 | |
|          bls.s     fpsrti              return identity
 | |
|  
 | |
| * save registers and insure input within + or - pi range
 | |
| fpscom   movem.l   d1-d6/a0,-(sp)      save all work registers
 | |
|          move.l    d7,d2               copy input over
 | |
|          add.b     d7,d7               rid sign bit
 | |
|          cmp.b     #(64+5)<<1,d7       ? abs(arg) < 2**6 (32)
 | |
|          bls.s     fpsnlr              branch yes, not too large
 | |
| * argument is too large to subtract to within range
 | |
|          cmp.b     #(64+20)<<1,d7      ? test excessive size (>2**20)
 | |
|          bls.s     fpsgpr              no, go ahead and use
 | |
| * error - argument so large result has no precision
 | |
| *        or.b      #$02,ccr            force v bit on
 | |
|          dc.l      $003c0002           *****assembler error*****
 | |
|          movem.l   (sp)+,d1-d6/a0      restore registers
 | |
|          add.l     #2,sp               clean internal argument off stack
 | |
|          rts                           return to caller
 | |
|  
 | |
| * we must find mod(arg,twopi) since argument is too large for subtractions
 | |
| fpsgpr   move.l    #inv2pi,d6          load up 2*pi inverse constant
 | |
|          move.l    d2,d7               copy over input argument
 | |
|          jsr       ffpmul2             divide by 2pi (via multiply inverse)
 | |
| * convert quotient to float integer
 | |
|          move.b    d7,d5               copy exponent over
 | |
|          and.b     #$7f,d5             rid sign from exponent
 | |
|          sub.b     #64+24,d5           find fractional precision
 | |
|          neg.b     d5                  make positive
 | |
|          move.l    #-1,d4              setup mask of all ones
 | |
|          clr.b     d4                  start zeroes at low byte
 | |
|          lsl.l     d5,d4               shift zeroes into fractional part
 | |
|          or.b      #$ff,d4             do not remove sign and exponent
 | |
|          and.l     d4,d7               strip fractional bits entirely
 | |
|          move.l    #pi+1,d6            load up 2*pi constant
 | |
|          jsr       ffpmul2             multiply back out
 | |
|          move.l    d7,d6               setup to subtract multiple of twopi
 | |
|          move.l    d2,d7               move argument in
 | |
|          jsr       ffpsub              find remainder of twopi divide
 | |
|          move.l    d7,d2               use it as new input argument
 | |
|  
 | |
| * convert argument to binary(31,26) precision for reduction within +-pi
 | |
| fpsnlr   move.l    #$0c90fdaa,d4      fixedpi>>4 load pi
 | |
|          move.l    d2,d7               copy float argument
 | |
|          clr.b     d7                  clear sign and exponent
 | |
|          tst.b     d2                  test sign
 | |
|          bmi.s     fpsnmi              branch negative
 | |
|          sub.b     #64+6,d2            obtain shift value
 | |
|          neg.b     d2                  for 5 bit non-fraction bits
 | |
|          cmp.b     #31,d2              ? very small number
 | |
|          bls.s     fpssh1              no, go ahead and shift
 | |
|          move.l    #0,d7               force to zero
 | |
| fpssh1   lsr.l     d2,d7               convert to fixed point
 | |
| * force to +pi or below
 | |
| fpspck   cmp.l     d4,d7               ? greater than pi
 | |
|          ble.s     fpsckm              branch not
 | |
|          sub.l     d4,d7               subtract
 | |
|          sub.l     d4,d7               .  twopi
 | |
|          bra.s     fpspck              and check again
 | |
|  
 | |
| fpsnmi   sub.b     #$80+64+6,d2        rid sign and get shift value
 | |
|          neg.b     d2                  for 5 non-fractional bits
 | |
|          cmp.b     #31,d2              ? very small number
 | |
|          bls.s     fpssh2              no, go ahead and shift
 | |
|          move.l    #0,d7               force to zero
 | |
| fpssh2   lsr.l     d2,d7               convert to fixed point
 | |
|          neg.l     d7                  make negative
 | |
|          neg.l     d4                  make -pi
 | |
| * force to -pi or above
 | |
| fpsnck   cmp.l     d4,d7               ? less than -pi
 | |
|          bge.s     fpsckm              branch not
 | |
|          sub.l     d4,d7               add
 | |
|          sub.l     d4,d7               .  twopi
 | |
|          bra.s     fpsnck              and check again
 | |
|  
 | |
| *****************************************
 | |
| * cordic calculation registers:         *
 | |
| * d1 - loop count   a0 - table pointer  *
 | |
| * d2 - shift count                      *
 | |
| * d3 - x'   d5 - x                      *
 | |
| * d4 - y'   d6 - y                      *
 | |
| * d7 - test argument                    *
 | |
| *****************************************
 | |
|  
 | |
| * input within range, now start cordic setup
 | |
| fpsckm   move.l    #0,d5               x=0
 | |
|          move.l    #nkfact,d6          y=negative inverse k factor seed
 | |
|          move.l    #$3243f6a8,d4       fixedpi>>2, setup fixed pi/2 constant
 | |
|          asl.l     #3,d7               now to binary(31,29) precision
 | |
|          bmi.s     fpsap2              branch if minus to add pi/2
 | |
|          neg.l     d6                  y=positive inverse k factor seed
 | |
|          neg.l     d4                  subtract pi/2 for positive argument
 | |
| fpsap2   add.l     d4,d7               add constant
 | |
|          lea       ffptheta,a0         load arctangent table
 | |
|          move.l    #23,d1              loop 24 times
 | |
|          move.l    #-1,d2              prime shift counter
 | |
| * cordic loop
 | |
| fsinlp   add.w     #1,d2               increment shift count
 | |
|          move.l    d5,d3               copy x
 | |
|          move.l    d6,d4               copy y
 | |
|          asr.l     d2,d3               shift for x'
 | |
|          asr.l     d2,d4               shift for y'
 | |
|          tst.l     d7                  test arg value
 | |
|          bmi.s     fsbmi               branch minus test
 | |
|          sub.l     d4,d5               x=x-y'
 | |
|          add.l     d3,d6               y=y+x'
 | |
|          sub.l     (a0)+,d7            arg=arg-table(n)
 | |
|          dbra      d1,fsinlp           loop until done
 | |
|          bra.s     fscom               enter common code
 | |
| fsbmi    add.l     d4,d5               x=x+y'
 | |
|          sub.l     d3,d6               y=y-x'
 | |
|          add.l     (a0)+,d7            arg=arg+table(n)
 | |
|          dbra      d1,fsinlp           loop until done
 | |
|  
 | |
| * now split up tangent and ffpsincs from sine and cosine
 | |
| fscom    move.w    7*4(sp),d1          reload internal parameter
 | |
|          bpl.s     fssincos            branch for sine or cosine
 | |
|  
 | |
|          add.b     #1,d1               see if was -1 for tangent
 | |
|          bne.s     fsdual              no, must be both sin and cosine
 | |
| * tangent finish
 | |
|          bsr.s     fsfloat             float y (sin)
 | |
|          move.l    d6,d7               setup for divide into
 | |
|          move.l    d5,d6               prepare x
 | |
|          bsr.s     fsfloat             float x (cos)
 | |
|          beq.s     fstinf              branch infinite result
 | |
|          jsr       ffpdiv              tangent = sin/cos
 | |
| fsinfrt  movem.l   (sp)+,d1-d6/a0      restore registers
 | |
|          add.l     #2,sp               delete internal parameter
 | |
|          rts                           return to caller
 | |
| * tangent is infinite. return maximum positive number.
 | |
| fstinf   move.l    #$ffffff7f,d7       largest ffp number
 | |
|          bra.s     fsinfrt             and clean up
 | |
|  
 | |
| * sine and cosine
 | |
| fssincos beq.s     fssine              branch if sine
 | |
|          move.l    d5,d6               use x for cosine
 | |
| fssine   bsr.s     fsfloat             convert to float
 | |
|          move.l    d6,d7               return result
 | |
|          tst.b     d7                  and condition code test
 | |
|          movem.l   (sp)+,d1-d6/a0      restore registers
 | |
|          add.l     #2,sp               delete internal parameter
 | |
|          rts                           return to caller
 | |
|  
 | |
| * both sine and cosine
 | |
| fsdual   move.l    d5,-(sp)            save cosine derivitive
 | |
|          bsr.s     fsfloat             convert sine derivitive to float
 | |
|          move.l    d6,6*4(sp)          place sine into saved d6
 | |
|          move.l    (sp)+,d6            restore cosine derivitive
 | |
|          bra.s     fssine              and continue restoring sine on the sly
 | |
|  
 | |
| * fsfloat - float internal precision but truncate to zero if < 2**-21
 | |
| fsfloat  move.l    d6,d4               copy internal precision value
 | |
|          bmi.s     fsfneg              branch negative
 | |
|          cmp.l     #$000000ff,d6       ? test magnitude
 | |
| *        bhi       ffptnorm            normalize if not too small
 | |
| 		bhi			dobranch
 | |
| fsfzro   move.l    #0,d6               return a zero
 | |
|          rts                           return to caller
 | |
| fsfneg   asr.l     #8,d4               see if all ones bits 8-31
 | |
|          add.l     #1,d4               ? goes to zero
 | |
| *        bne       ffptnorm            normalize if not too small
 | |
| 		bne			dobranch
 | |
|          bra.s     fsfzro              return zero
 | |
|  
 | |
| dobranch:
 | |
| 		jmp		ffptnorm			16-bit no-MMU problem
 | |
|  
 | |
|          end
 |