lib1funcs.S 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. /*
  2. * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  3. *
  4. * Author: Nicolas Pitre <[email protected]>
  5. * - contributed to gcc-3.4 on Sep 30, 2003
  6. * - adapted for the Linux kernel on Oct 2, 2003
  7. */
  8. /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  9. This file is free software; you can redistribute it and/or modify it
  10. under the terms of the GNU General Public License as published by the
  11. Free Software Foundation; either version 2, or (at your option) any
  12. later version.
  13. In addition to the permissions in the GNU General Public License, the
  14. Free Software Foundation gives you unlimited permission to link the
  15. compiled version of this file into combinations with other programs,
  16. and to distribute those combinations without any restriction coming
  17. from the use of this file. (The General Public License restrictions
  18. do apply in other respects; for example, they cover modification of
  19. the file, and distribution when not linked into a combine
  20. executable.)
  21. This file is distributed in the hope that it will be useful, but
  22. WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. General Public License for more details.
  25. You should have received a copy of the GNU General Public License
  26. along with this program; see the file COPYING. If not, write to
  27. the Free Software Foundation, 59 Temple Place - Suite 330,
  28. Boston, MA 02111-1307, USA. */
  29. #define UNWIND(x...)
  30. #define ENTRY(__f) \
  31. .align 3 ;\
  32. .globl __f ;\
  33. .type __f,%function ;\
  34. __f:
  35. #define ENDPROC(__f) ;\
  36. .size __f, . - __f
  37. .macro ARM_DIV_BODY dividend, divisor, result, curbit
  38. clz \curbit, \divisor
  39. clz \result, \dividend
  40. sub \result, \curbit, \result
  41. mov \curbit, #1
  42. mov \divisor, \divisor, lsl \result
  43. mov \curbit, \curbit, lsl \result
  44. mov \result, #0
  45. @ Division loop
  46. 1: cmp \dividend, \divisor
  47. subhs \dividend, \dividend, \divisor
  48. orrhs \result, \result, \curbit
  49. cmp \dividend, \divisor, lsr #1
  50. subhs \dividend, \dividend, \divisor, lsr #1
  51. orrhs \result, \result, \curbit, lsr #1
  52. cmp \dividend, \divisor, lsr #2
  53. subhs \dividend, \dividend, \divisor, lsr #2
  54. orrhs \result, \result, \curbit, lsr #2
  55. cmp \dividend, \divisor, lsr #3
  56. subhs \dividend, \dividend, \divisor, lsr #3
  57. orrhs \result, \result, \curbit, lsr #3
  58. cmp \dividend, #0 @ Early termination?
  59. movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
  60. movne \divisor, \divisor, lsr #4
  61. bne 1b
  62. .endm
  63. .macro ARM_DIV2_ORDER divisor, order
  64. clz \order, \divisor
  65. rsb \order, \order, #31
  66. .endm
  67. .macro ARM_MOD_BODY dividend, divisor, order, spare
  68. clz \order, \divisor
  69. clz \spare, \dividend
  70. sub \order, \order, \spare
  71. mov \divisor, \divisor, lsl \order
  72. @ Perform all needed substractions to keep only the reminder.
  73. @ Do comparisons in batch of 4 first.
  74. subs \order, \order, #3 @ yes, 3 is intended here
  75. blt 2f
  76. 1: cmp \dividend, \divisor
  77. subhs \dividend, \dividend, \divisor
  78. cmp \dividend, \divisor, lsr #1
  79. subhs \dividend, \dividend, \divisor, lsr #1
  80. cmp \dividend, \divisor, lsr #2
  81. subhs \dividend, \dividend, \divisor, lsr #2
  82. cmp \dividend, \divisor, lsr #3
  83. subhs \dividend, \dividend, \divisor, lsr #3
  84. cmp \dividend, #1
  85. mov \divisor, \divisor, lsr #4
  86. subges \order, \order, #4
  87. bge 1b
  88. tst \order, #3
  89. teqne \dividend, #0
  90. beq 5f
  91. @ Either 1, 2 or 3 comparison/substractions are left.
  92. 2: cmn \order, #2
  93. blt 4f
  94. beq 3f
  95. cmp \dividend, \divisor
  96. subhs \dividend, \dividend, \divisor
  97. mov \divisor, \divisor, lsr #1
  98. 3: cmp \dividend, \divisor
  99. subhs \dividend, \dividend, \divisor
  100. mov \divisor, \divisor, lsr #1
  101. 4: cmp \dividend, \divisor
  102. subhs \dividend, \dividend, \divisor
  103. 5:
  104. .endm
  105. ENTRY(__aeabi_uidiv)
  106. ENTRY(__udivsi3)
  107. subs r2, r1, #1
  108. moveq pc, lr
  109. bcc Ldiv0
  110. cmp r0, r1
  111. bls 11f
  112. tst r1, r2
  113. beq 12f
  114. ARM_DIV_BODY r0, r1, r2, r3
  115. mov r0, r2
  116. mov pc, lr
  117. 11: moveq r0, #1
  118. movne r0, #0
  119. mov pc, lr
  120. 12: ARM_DIV2_ORDER r1, r2
  121. mov r0, r0, lsr r2
  122. mov pc, lr
  123. UNWIND(.fnend)
  124. ENDPROC(__udivsi3)
  125. ENDPROC(__aeabi_uidiv)
  126. ENTRY(__umodsi3)
  127. UNWIND(.fnstart)
  128. subs r2, r1, #1 @ compare divisor with 1
  129. bcc Ldiv0
  130. cmpne r0, r1 @ compare dividend with divisor
  131. moveq r0, #0
  132. tsthi r1, r2 @ see if divisor is power of 2
  133. andeq r0, r0, r2
  134. movls pc, lr
  135. ARM_MOD_BODY r0, r1, r2, r3
  136. mov pc, lr
  137. UNWIND(.fnend)
  138. ENDPROC(__umodsi3)
  139. ENTRY(__divsi3)
  140. ENTRY(__aeabi_idiv)
  141. UNWIND(.fnstart)
  142. cmp r1, #0
  143. eor ip, r0, r1 @ save the sign of the result.
  144. beq Ldiv0
  145. rsbmi r1, r1, #0 @ loops below use unsigned.
  146. subs r2, r1, #1 @ division by 1 or -1 ?
  147. beq 10f
  148. movs r3, r0
  149. rsbmi r3, r0, #0 @ positive dividend value
  150. cmp r3, r1
  151. bls 11f
  152. tst r1, r2 @ divisor is power of 2 ?
  153. beq 12f
  154. ARM_DIV_BODY r3, r1, r0, r2
  155. cmp ip, #0
  156. rsbmi r0, r0, #0
  157. mov pc, lr
  158. 10: teq ip, r0 @ same sign ?
  159. rsbmi r0, r0, #0
  160. mov pc, lr
  161. 11: movlo r0, #0
  162. moveq r0, ip, asr #31
  163. orreq r0, r0, #1
  164. mov pc, lr
  165. 12: ARM_DIV2_ORDER r1, r2
  166. cmp ip, #0
  167. mov r0, r3, lsr r2
  168. rsbmi r0, r0, #0
  169. mov pc, lr
  170. UNWIND(.fnend)
  171. ENDPROC(__divsi3)
  172. ENDPROC(__aeabi_idiv)
  173. ENTRY(__modsi3)
  174. UNWIND(.fnstart)
  175. cmp r1, #0
  176. beq Ldiv0
  177. rsbmi r1, r1, #0 @ loops below use unsigned.
  178. movs ip, r0 @ preserve sign of dividend
  179. rsbmi r0, r0, #0 @ if negative make positive
  180. subs r2, r1, #1 @ compare divisor with 1
  181. cmpne r0, r1 @ compare dividend with divisor
  182. moveq r0, #0
  183. tsthi r1, r2 @ see if divisor is power of 2
  184. andeq r0, r0, r2
  185. bls 10f
  186. ARM_MOD_BODY r0, r1, r2, r3
  187. 10: cmp ip, #0
  188. rsbmi r0, r0, #0
  189. mov pc, lr
  190. UNWIND(.fnend)
  191. ENDPROC(__modsi3)
  192. ENTRY(__aeabi_uidivmod)
  193. UNWIND(.fnstart)
  194. UNWIND(.save {r0, r1, ip, lr} )
  195. stmfd sp!, {r0, r1, ip, lr}
  196. bl __aeabi_uidiv
  197. ldmfd sp!, {r1, r2, ip, lr}
  198. mul r3, r0, r2
  199. sub r1, r1, r3
  200. mov pc, lr
  201. UNWIND(.fnend)
  202. ENDPROC(__aeabi_uidivmod)
  203. ENTRY(__aeabi_idivmod)
  204. UNWIND(.fnstart)
  205. UNWIND(.save {r0, r1, ip, lr} )
  206. stmfd sp!, {r0, r1, ip, lr}
  207. bl __aeabi_idiv
  208. ldmfd sp!, {r1, r2, ip, lr}
  209. mul r3, r0, r2
  210. sub r1, r1, r3
  211. mov pc, lr
  212. UNWIND(.fnend)
  213. ENDPROC(__aeabi_idivmod)
  214. Ldiv0:
  215. UNWIND(.fnstart)
  216. UNWIND(.pad #4)
  217. UNWIND(.save {lr})
  218. str lr, [sp, #-8]!
  219. bl __div0
  220. mov r0, #0 @ About as wrong as it could be.
  221. ldr pc, [sp], #8
  222. UNWIND(.fnend)
  223. ENDPROC(Ldiv0)