mirror of
				https://github.com/SEPPDROID/Digital-Research-Source-Code.git
				synced 2025-10-25 09:24:19 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			167 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			167 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|          ttl       fast floating point divide (ffpdiv)
 | |
| *****************************************
 | |
| *  (c) copyright 1980 by motorola inc.  *
 | |
| *****************************************
 | |
|  
 | |
| ********************************************
 | |
| *           ffpdiv subroutine              *
 | |
| *                                          *
 | |
| * input:                                   *
 | |
| *        d6 - floating point divisor       *
 | |
| *        d7 - floating point dividend      *
 | |
| *                                          *
 | |
| * output:                                  *
 | |
| *        d7 - floating point quotient      *
 | |
| *                                          *
 | |
| * condition codes:                         *
 | |
| *        n - set if result negative        *
 | |
| *        z - set if result zero            *
 | |
| *        v - set if result overflowed      *
 | |
| *        c - undefined                     *
 | |
| *        x - undefined                     *
 | |
| *                                          *
 | |
| * registers d3 thru d5 volatile            *
 | |
| *                                          *
 | |
| * code: 150 bytes     stack work: 0 bytes  *
 | |
| *                                          *
 | |
| * notes:                                   *
 | |
| *   1) divisor is unaltered (d6).          *
 | |
| *   2) underflows return zero without      *
 | |
| *      any indicators set.                 *
 | |
| *   3) overflows return the highest value  *
 | |
| *      with the proper sign and the 'v'    *
 | |
| *      bit set in the ccr.                 *
 | |
| *   4) if a divide by zero is attempted    *
 | |
| *      the divide by zero exception trap   *
 | |
| *      is forced by this code with the     *
 | |
| *      original arguments intact.  if the  *
 | |
| *      exception returns with the denom-   *
 | |
| *      inator altered the divide operation *
 | |
| *      continues, otherwise an overflow    *
 | |
| *      is forced with the proper sign.     *
 | |
| *      the floating divide by zero can be  *
 | |
| *      distinguished from true zero divide *
 | |
| *      by the fact that it is an immediate *
 | |
| *      zero dividing into register d7.     *
 | |
| *                                          *
 | |
| * time: (8 mhz no wait states assumed)     *
 | |
| * dividend zero         5.250 microseconds *
 | |
| * minimum time others  72.750 microseconds *
 | |
| * maximum time others  85.000 microseconds *
 | |
| * average others       76.687 microseconds *
 | |
| *                                          *
 | |
| ********************************************
 | |
|          page
 | |
| ffpdiv   idnt      1,1  ffp divide
 | |
|  
 | |
|          xdef      ffpdiv     entry point
 | |
|          xref      ffpcpyrt   copyright notice
 | |
|  
 | |
|          section    9
 | |
|  
 | |
| * divide by zero exit
 | |
| fpddzr divu.w #0,d7     **force divide by zero **
 | |
|  
 | |
| * if the exception returns with altered denominator - continue divide
 | |
|          tst.l     d6        ? exception alter the zero
 | |
|          bne.s     ffpdiv    branch if so to continue
 | |
| * setup maximum number for divide overflow
 | |
| fpdovf or.l   #$ffffff7f,d7 maximize with proper sign
 | |
|        tst.b  d7        set condition code for sign
 | |
| *      or.w   #$02,ccr  set overflow bit
 | |
|        dc.l   $003c0002 ******sick assembler******
 | |
| fpdrtn rts              return to caller
 | |
|  
 | |
| * over or underflow detected
 | |
| fpdov2   swap.w    d6        restore arg1
 | |
|          swap.w    d7        restore arg2 for sign
 | |
| fpdovfs eor.b  d6,d7     setup correct sign
 | |
|        bra.s  fpdovf    and enter overflow handling
 | |
| fpdouf bmi.s  fpdovfs   branch if overflow
 | |
| fpdund move.l #0,d7     underflow to zero
 | |
|        rts              and return to caller
 | |
|  
 | |
| ***************
 | |
| * entry point *
 | |
| ***************
 | |
|  
 | |
| * first subtract exponents
 | |
| ffpdiv move.b d6,d5     copy arg1 (divisor)
 | |
|        beq.s  fpddzr    branch if divide by zero
 | |
|        move.l d7,d4     copy arg2 (dividend)
 | |
|        beq.s  fpdrtn    return zero if dividend zero
 | |
|        moveq  #-128,d3  setup sign mask
 | |
|        add.w  d5,d5     isolate arg1 sign from exponent
 | |
|        add.w  d4,d4     isolate arg2 sign from exponent
 | |
|        eor.b  d3,d5     adjust arg1 exponent to binary
 | |
|        eor.b  d3,d4     adjust arg2 exponent to binary
 | |
|        sub.b  d5,d4     subtract exponents
 | |
|        bvs.s  fpdouf    branch if overflow/underflow
 | |
|        clr.b  d7        clear arg2 s+exp
 | |
|        swap.w d7        prepare high 16 bit compare
 | |
|        swap.w d6        against arg1 and arg2
 | |
|        cmp.w  d6,d7     ? check if overflow will occur
 | |
|        bmi.s  fpdnov    branch if not
 | |
| * adjust for fixed point divide overflow
 | |
|        add.b  #2,d4     adjust exponent up one
 | |
|        bvs.s  fpdov2    branch overflow here
 | |
|        ror.l  #1,d7     shift down by power of two
 | |
| fpdnov swap.w d7        correct arg2
 | |
|        move.b d3,d5     move $80 into d5.b
 | |
|        eor.w  d5,d4     create sign and absolutize exponent
 | |
|        lsr.w  #1,d4     d4.b now has sign+exponent of result
 | |
|  
 | |
| * now divide just using 16 bits into 24
 | |
|        move.l d7,d3     copy arg1 for initial divide
 | |
|        divu.w d6,d3     obtain test quotient
 | |
|        move.w d3,d5     save test quotient
 | |
|  
 | |
| * now multiply 16-bit divide result times full 24 bit divisor and compare
 | |
| * with the dividend.  multiplying back out with the full 24-bits allows
 | |
| * us to see if the result was too large due to the 8 missing divisor bits
 | |
| * used in the hardware divide.  the result can only be too large by 1 unit.
 | |
|        mulu.w d6,d3     high divisor x quotient
 | |
|        sub.l  d3,d7     d7=partial subtraction
 | |
|        swap.w d7        to low divisor
 | |
|        swap.w d6        rebuild arg1 to normal
 | |
|        move.w d6,d3     setup arg1 for product
 | |
|        clr.b  d3        zero low byte
 | |
|        mulu.w d5,d3     find remaining product
 | |
|        sub.l  d3,d7     now have full subtraction
 | |
|        bcc.s  fpdqok    branch first 16 bits correct
 | |
|  
 | |
| * estimate too high, decrement quotient by one
 | |
|        move.l d6,d3     rebuild divisor
 | |
|        clr.b  d3        reverse halves
 | |
|        add.l  d3,d7     add another divisor
 | |
|        sub.w  #1,d5     decrement quotient
 | |
|  
 | |
| * compute last 8 bits with another divide.  the exact remainder from the
 | |
| * multiply and compare above is divided again by a 16-bit only divisor.
 | |
| * however, this time we require only 9 bits of accuracy in the result
 | |
| * (8 to make 24 bits total and 1 extra bit for rounding purposes) and this
 | |
| * divide always returns a precision of at least 9 bits.
 | |
| fpdqok move.l d6,d3     copy arg1 again
 | |
|        swap.w d3        first 16 bits divisor in d3.w
 | |
|        clr.w  d7        into first 16 bits of dividend
 | |
|        divu.w d3,d7     obtain final 16 bit result
 | |
|        swap.w d5        first 16 quotient to high half
 | |
|        bmi.s  fpdisn    branch if normalized
 | |
| * rare occurrance - unnormalized
 | |
| * happends when mantissa arg1 < arg2 and they differ only in last 8 bits
 | |
|        move.w d7,d5     insert low word of quotient
 | |
|        add.l  d5,d5     shift mantissa left one
 | |
|        sub.b  #1,d4     adjust exponent down (cannot zero)
 | |
|        move.w d5,d7     cancel next instruction
 | |
|  
 | |
| * rebuild our final result and return
 | |
| fpdisn move.w d7,d5     append next 16 bits
 | |
|        add.l  #$80,d5   round to 24 bits (cannot overflow)
 | |
|        move.l d5,d7     return in d7
 | |
|        move.b d4,d7     finish result with sign+exponent
 | |
|        beq.s  fpdund    underflow if zero exponent
 | |
|        rts              return result to caller
 | |
|  
 | |
|  
 | |
|        end
 |