div64.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /*
  2. * linux/arch/arm/lib/div64.S
  3. *
  4. * Optimized computation of 64-bit dividend / 32-bit divisor
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Oct 5, 2003
  8. * Copyright: Monta Vista Software, Inc.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2 as
  12. * published by the Free Software Foundation.
  13. */
  14. #define xl r0
  15. #define xh r1
  16. #define yl r2
  17. #define yh r3
  18. #define UNWIND(x...)
  19. #define ARM(x...) x
  20. #define THUMB(x...)
  21. #define ENTRY(__f) \
  22. .align 3 ;\
  23. .globl __f ;\
  24. .type __f,%function ;\
  25. __f:
  26. #define ENDPROC(__f) ;\
  27. .size __f, . - __f
  28. /*
  29. UINT64
  30. DivU64x32 (
  31. IN UINT64 Dividend,
  32. IN UINTN Divisor,
  33. OUT UINTN *Remainder OPTIONAL
  34. )
  35. // divide 64bit by 32bit and get a 64bit result
  36. // N.B. only works for 31bit divisors!!
  37. {
  38. }
  39. */
  40. ENTRY(__DivU64x32)
  41. stmfd sp!, {r4-r6, lr}
  42. mov r5, r4 @ preserve Remainder
  43. mov r4, r2 @ divisor in r4
  44. bl __do_div64
  45. teq r5, #0
  46. strne xh, [r5]
  47. mov r0, yl
  48. mov r1, yh
  49. ldmfd sp!, {r4-r6, pc}
  50. ENDPROC(__DivU64x32)
  51. /*
  52. * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  53. *
  54. * Note: Calling convention is totally non standard for optimal code.
  55. * This is meant to be used by do_div() from include/asm/div64.h only.
  56. *
  57. * Input parameters:
  58. * xh-xl = dividend (clobbered)
  59. * r4 = divisor (preserved)
  60. *
  61. * Output values:
  62. * yh-yl = result
  63. * xh = remainder
  64. *
  65. * Clobbered regs: xl, ip
  66. */
  67. ENTRY(__do_div64)
  68. UNWIND(.fnstart)
  69. @ Test for easy paths first.
  70. subs ip, r4, #1
  71. bls 9f @ divisor is 0 or 1
  72. tst ip, r4
  73. beq 8f @ divisor is power of 2
  74. @ See if we need to handle upper 32-bit result.
  75. cmp xh, r4
  76. mov yh, #0
  77. blo 3f
  78. @ Align divisor with upper part of dividend.
  79. @ The aligned divisor is stored in yl preserving the original.
  80. @ The bit position is stored in ip.
  81. clz yl, r4
  82. clz ip, xh
  83. sub yl, yl, ip
  84. mov ip, #1
  85. mov ip, ip, lsl yl
  86. mov yl, r4, lsl yl
  87. @ The division loop for needed upper bit positions.
  88. @ Break out early if dividend reaches 0.
  89. 2: cmp xh, yl
  90. orrcs yh, yh, ip
  91. subcss xh, xh, yl
  92. movnes ip, ip, lsr #1
  93. mov yl, yl, lsr #1
  94. bne 2b
  95. @ See if we need to handle lower 32-bit result.
  96. 3: cmp xh, #0
  97. mov yl, #0
  98. cmpeq xl, r4
  99. movlo xh, xl
  100. movlo pc, lr
  101. @ The division loop for lower bit positions.
  102. @ Here we shift remainer bits leftwards rather than moving the
  103. @ divisor for comparisons, considering the carry-out bit as well.
  104. mov ip, #0x80000000
  105. 4: movs xl, xl, lsl #1
  106. adcs xh, xh, xh
  107. beq 6f
  108. cmpcc xh, r4
  109. 5: orrcs yl, yl, ip
  110. subcs xh, xh, r4
  111. movs ip, ip, lsr #1
  112. bne 4b
  113. mov pc, lr
  114. @ The top part of remainder became zero. If carry is set
  115. @ (the 33th bit) this is a false positive so resume the loop.
  116. @ Otherwise, if lower part is also null then we are done.
  117. 6: bcs 5b
  118. cmp xl, #0
  119. moveq pc, lr
  120. @ We still have remainer bits in the low part. Bring them up.
  121. clz xh, xl @ we know xh is zero here so...
  122. add xh, xh, #1
  123. mov xl, xl, lsl xh
  124. mov ip, ip, lsr xh
  125. @ Current remainder is now 1. It is worthless to compare with
  126. @ divisor at this point since divisor can not be smaller than 3 here.
  127. @ If possible, branch for another shift in the division loop.
  128. @ If no bit position left then we are done.
  129. movs ip, ip, lsr #1
  130. mov xh, #1
  131. bne 4b
  132. mov pc, lr
  133. 8: @ Division by a power of 2: determine what that divisor order is
  134. @ then simply shift values around
  135. clz ip, r4
  136. rsb ip, ip, #31
  137. mov yh, xh, lsr ip
  138. mov yl, xl, lsr ip
  139. rsb ip, ip, #32
  140. ARM( orr yl, yl, xh, lsl ip )
  141. THUMB( lsl xh, xh, ip )
  142. THUMB( orr yl, yl, xh )
  143. mov xh, xl, lsl ip
  144. mov xh, xh, lsr ip
  145. mov pc, lr
  146. @ eq -> division by 1: obvious enough...
  147. 9: moveq yl, xl
  148. moveq yh, xh
  149. moveq xh, #0
  150. moveq pc, lr
  151. UNWIND(.fnend)
  152. UNWIND(.fnstart)
  153. UNWIND(.pad #4)
  154. UNWIND(.save {lr})
  155. Ldiv0_64:
  156. @ Division by 0:
  157. str lr, [sp, #-8]!
  158. bl __div0
  159. @ as wrong as it could be...
  160. mov yl, #0
  161. mov yh, #0
  162. mov xh, #0
  163. ldr pc, [sp], #8
  164. UNWIND(.fnend)
  165. ENDPROC(__do_div64)