memmove.S 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. ! Copyright (C) 2008-2012 Imagination Technologies Ltd.
  2. .text
  3. .global _memmove
  4. .type _memmove,function
  5. ! D1Ar1 dst
  6. ! D0Ar2 src
  7. ! D1Ar3 cnt
  8. ! D0Re0 dst
  9. _memmove:
  10. CMP D1Ar3, #0
  11. MOV D0Re0, D1Ar1
  12. BZ $LEND2
  13. MSETL [A0StP], D0.5, D0.6, D0.7
  14. MOV D1Ar5, D0Ar2
  15. CMP D1Ar1, D1Ar5
  16. BLT $Lforwards_copy
  17. SUB D0Ar4, D1Ar1, D1Ar3
  18. ADD D0Ar4, D0Ar4, #1
  19. CMP D0Ar2, D0Ar4
  20. BLT $Lforwards_copy
  21. ! should copy backwards
  22. MOV D1Re0, D0Ar2
  23. ! adjust pointer to the end of mem
  24. ADD D0Ar2, D1Re0, D1Ar3
  25. ADD D1Ar1, D1Ar1, D1Ar3
  26. MOV A1.2, D0Ar2
  27. MOV A0.2, D1Ar1
  28. CMP D1Ar3, #8
  29. BLT $Lbbyte_loop
  30. MOV D0Ar4, D0Ar2
  31. MOV D1Ar5, D1Ar1
  32. ! test 8 byte alignment
  33. ANDS D1Ar5, D1Ar5, #7
  34. BNE $Lbdest_unaligned
  35. ANDS D0Ar4, D0Ar4, #7
  36. BNE $Lbsrc_unaligned
  37. LSR D1Ar5, D1Ar3, #3
  38. $Lbaligned_loop:
  39. GETL D0Re0, D1Re0, [--A1.2]
  40. SETL [--A0.2], D0Re0, D1Re0
  41. SUBS D1Ar5, D1Ar5, #1
  42. BNE $Lbaligned_loop
  43. ANDS D1Ar3, D1Ar3, #7
  44. BZ $Lbbyte_loop_exit
  45. $Lbbyte_loop:
  46. GETB D1Re0, [--A1.2]
  47. SETB [--A0.2], D1Re0
  48. SUBS D1Ar3, D1Ar3, #1
  49. BNE $Lbbyte_loop
  50. $Lbbyte_loop_exit:
  51. MOV D0Re0, A0.2
  52. $LEND:
  53. SUB A0.2, A0StP, #24
  54. MGETL D0.5, D0.6, D0.7, [A0.2]
  55. SUB A0StP, A0StP, #24
  56. $LEND2:
  57. MOV PC, D1RtP
  58. $Lbdest_unaligned:
  59. GETB D0Re0, [--A1.2]
  60. SETB [--A0.2], D0Re0
  61. SUBS D1Ar5, D1Ar5, #1
  62. SUB D1Ar3, D1Ar3, #1
  63. BNE $Lbdest_unaligned
  64. CMP D1Ar3, #8
  65. BLT $Lbbyte_loop
  66. $Lbsrc_unaligned:
  67. LSR D1Ar5, D1Ar3, #3
  68. ! adjust A1.2
  69. MOV D0Ar4, A1.2
  70. ! save original address
  71. MOV D0Ar6, A1.2
  72. ADD D0Ar4, D0Ar4, #7
  73. ANDMB D0Ar4, D0Ar4, #0xfff8
  74. ! new address is the 8-byte aligned one above the original
  75. MOV A1.2, D0Ar4
  76. ! A0.2 dst 64-bit is aligned
  77. ! measure the gap size
  78. SUB D0Ar6, D0Ar4, D0Ar6
  79. MOVS D0Ar4, D0Ar6
  80. ! keep this information for the later adjustment
  81. ! both aligned
  82. BZ $Lbaligned_loop
  83. ! prefetch
  84. GETL D0Re0, D1Re0, [--A1.2]
  85. CMP D0Ar6, #4
  86. BLT $Lbunaligned_1_2_3
  87. ! 32-bit aligned
  88. BZ $Lbaligned_4
  89. SUB D0Ar6, D0Ar6, #4
  90. ! D1.6 stores the gap size in bits
  91. MULW D1.6, D0Ar6, #8
  92. MOV D0.6, #32
  93. ! D0.6 stores the complement of the gap size
  94. SUB D0.6, D0.6, D1.6
  95. $Lbunaligned_5_6_7:
  96. GETL D0.7, D1.7, [--A1.2]
  97. ! form 64-bit data in D0Re0, D1Re0
  98. MOV D1Re0, D0Re0
  99. ! D1Re0 << gap-size
  100. LSL D1Re0, D1Re0, D1.6
  101. MOV D0Re0, D1.7
  102. ! D0Re0 >> complement
  103. LSR D0Re0, D0Re0, D0.6
  104. MOV D1.5, D0Re0
  105. ! combine the both
  106. ADD D1Re0, D1Re0, D1.5
  107. MOV D1.5, D1.7
  108. LSL D1.5, D1.5, D1.6
  109. MOV D0Re0, D0.7
  110. LSR D0Re0, D0Re0, D0.6
  111. MOV D0.5, D1.5
  112. ADD D0Re0, D0Re0, D0.5
  113. SETL [--A0.2], D0Re0, D1Re0
  114. MOV D0Re0, D0.7
  115. MOV D1Re0, D1.7
  116. SUBS D1Ar5, D1Ar5, #1
  117. BNE $Lbunaligned_5_6_7
  118. ANDS D1Ar3, D1Ar3, #7
  119. BZ $Lbbyte_loop_exit
  120. ! Adjust A1.2
  121. ! A1.2 <- A1.2 +8 - gapsize
  122. ADD A1.2, A1.2, #8
  123. SUB A1.2, A1.2, D0Ar4
  124. B $Lbbyte_loop
  125. $Lbunaligned_1_2_3:
  126. MULW D1.6, D0Ar6, #8
  127. MOV D0.6, #32
  128. SUB D0.6, D0.6, D1.6
  129. $Lbunaligned_1_2_3_loop:
  130. GETL D0.7, D1.7, [--A1.2]
  131. ! form 64-bit data in D0Re0, D1Re0
  132. LSL D1Re0, D1Re0, D1.6
  133. ! save D0Re0 for later use
  134. MOV D0.5, D0Re0
  135. LSR D0Re0, D0Re0, D0.6
  136. MOV D1.5, D0Re0
  137. ADD D1Re0, D1Re0, D1.5
  138. ! orignal data in D0Re0
  139. MOV D1.5, D0.5
  140. LSL D1.5, D1.5, D1.6
  141. MOV D0Re0, D1.7
  142. LSR D0Re0, D0Re0, D0.6
  143. MOV D0.5, D1.5
  144. ADD D0Re0, D0Re0, D0.5
  145. SETL [--A0.2], D0Re0, D1Re0
  146. MOV D0Re0, D0.7
  147. MOV D1Re0, D1.7
  148. SUBS D1Ar5, D1Ar5, #1
  149. BNE $Lbunaligned_1_2_3_loop
  150. ANDS D1Ar3, D1Ar3, #7
  151. BZ $Lbbyte_loop_exit
  152. ! Adjust A1.2
  153. ADD A1.2, A1.2, #8
  154. SUB A1.2, A1.2, D0Ar4
  155. B $Lbbyte_loop
  156. $Lbaligned_4:
  157. GETL D0.7, D1.7, [--A1.2]
  158. MOV D1Re0, D0Re0
  159. MOV D0Re0, D1.7
  160. SETL [--A0.2], D0Re0, D1Re0
  161. MOV D0Re0, D0.7
  162. MOV D1Re0, D1.7
  163. SUBS D1Ar5, D1Ar5, #1
  164. BNE $Lbaligned_4
  165. ANDS D1Ar3, D1Ar3, #7
  166. BZ $Lbbyte_loop_exit
  167. ! Adjust A1.2
  168. ADD A1.2, A1.2, #8
  169. SUB A1.2, A1.2, D0Ar4
  170. B $Lbbyte_loop
  171. $Lforwards_copy:
  172. MOV A1.2, D0Ar2
  173. MOV A0.2, D1Ar1
  174. CMP D1Ar3, #8
  175. BLT $Lfbyte_loop
  176. MOV D0Ar4, D0Ar2
  177. MOV D1Ar5, D1Ar1
  178. ANDS D1Ar5, D1Ar5, #7
  179. BNE $Lfdest_unaligned
  180. ANDS D0Ar4, D0Ar4, #7
  181. BNE $Lfsrc_unaligned
  182. LSR D1Ar5, D1Ar3, #3
  183. $Lfaligned_loop:
  184. GETL D0Re0, D1Re0, [A1.2++]
  185. SUBS D1Ar5, D1Ar5, #1
  186. SETL [A0.2++], D0Re0, D1Re0
  187. BNE $Lfaligned_loop
  188. ANDS D1Ar3, D1Ar3, #7
  189. BZ $Lfbyte_loop_exit
  190. $Lfbyte_loop:
  191. GETB D1Re0, [A1.2++]
  192. SETB [A0.2++], D1Re0
  193. SUBS D1Ar3, D1Ar3, #1
  194. BNE $Lfbyte_loop
  195. $Lfbyte_loop_exit:
  196. MOV D0Re0, D1Ar1
  197. B $LEND
  198. $Lfdest_unaligned:
  199. GETB D0Re0, [A1.2++]
  200. ADD D1Ar5, D1Ar5, #1
  201. SUB D1Ar3, D1Ar3, #1
  202. SETB [A0.2++], D0Re0
  203. CMP D1Ar5, #8
  204. BNE $Lfdest_unaligned
  205. CMP D1Ar3, #8
  206. BLT $Lfbyte_loop
  207. $Lfsrc_unaligned:
  208. ! adjust A1.2
  209. LSR D1Ar5, D1Ar3, #3
  210. MOV D0Ar4, A1.2
  211. MOV D0Ar6, A1.2
  212. ANDMB D0Ar4, D0Ar4, #0xfff8
  213. MOV A1.2, D0Ar4
  214. ! A0.2 dst 64-bit is aligned
  215. SUB D0Ar6, D0Ar6, D0Ar4
  216. ! keep the information for the later adjustment
  217. MOVS D0Ar4, D0Ar6
  218. ! both aligned
  219. BZ $Lfaligned_loop
  220. ! prefetch
  221. GETL D0Re0, D1Re0, [A1.2]
  222. CMP D0Ar6, #4
  223. BLT $Lfunaligned_1_2_3
  224. BZ $Lfaligned_4
  225. SUB D0Ar6, D0Ar6, #4
  226. MULW D0.6, D0Ar6, #8
  227. MOV D1.6, #32
  228. SUB D1.6, D1.6, D0.6
  229. $Lfunaligned_5_6_7:
  230. GETL D0.7, D1.7, [++A1.2]
  231. ! form 64-bit data in D0Re0, D1Re0
  232. MOV D0Re0, D1Re0
  233. LSR D0Re0, D0Re0, D0.6
  234. MOV D1Re0, D0.7
  235. LSL D1Re0, D1Re0, D1.6
  236. MOV D0.5, D1Re0
  237. ADD D0Re0, D0Re0, D0.5
  238. MOV D0.5, D0.7
  239. LSR D0.5, D0.5, D0.6
  240. MOV D1Re0, D1.7
  241. LSL D1Re0, D1Re0, D1.6
  242. MOV D1.5, D0.5
  243. ADD D1Re0, D1Re0, D1.5
  244. SETL [A0.2++], D0Re0, D1Re0
  245. MOV D0Re0, D0.7
  246. MOV D1Re0, D1.7
  247. SUBS D1Ar5, D1Ar5, #1
  248. BNE $Lfunaligned_5_6_7
  249. ANDS D1Ar3, D1Ar3, #7
  250. BZ $Lfbyte_loop_exit
  251. ! Adjust A1.2
  252. ADD A1.2, A1.2, D0Ar4
  253. B $Lfbyte_loop
  254. $Lfunaligned_1_2_3:
  255. MULW D0.6, D0Ar6, #8
  256. MOV D1.6, #32
  257. SUB D1.6, D1.6, D0.6
  258. $Lfunaligned_1_2_3_loop:
  259. GETL D0.7, D1.7, [++A1.2]
  260. ! form 64-bit data in D0Re0, D1Re0
  261. LSR D0Re0, D0Re0, D0.6
  262. MOV D1.5, D1Re0
  263. LSL D1Re0, D1Re0, D1.6
  264. MOV D0.5, D1Re0
  265. ADD D0Re0, D0Re0, D0.5
  266. MOV D0.5, D1.5
  267. LSR D0.5, D0.5, D0.6
  268. MOV D1Re0, D0.7
  269. LSL D1Re0, D1Re0, D1.6
  270. MOV D1.5, D0.5
  271. ADD D1Re0, D1Re0, D1.5
  272. SETL [A0.2++], D0Re0, D1Re0
  273. MOV D0Re0, D0.7
  274. MOV D1Re0, D1.7
  275. SUBS D1Ar5, D1Ar5, #1
  276. BNE $Lfunaligned_1_2_3_loop
  277. ANDS D1Ar3, D1Ar3, #7
  278. BZ $Lfbyte_loop_exit
  279. ! Adjust A1.2
  280. ADD A1.2, A1.2, D0Ar4
  281. B $Lfbyte_loop
  282. $Lfaligned_4:
  283. GETL D0.7, D1.7, [++A1.2]
  284. MOV D0Re0, D1Re0
  285. MOV D1Re0, D0.7
  286. SETL [A0.2++], D0Re0, D1Re0
  287. MOV D0Re0, D0.7
  288. MOV D1Re0, D1.7
  289. SUBS D1Ar5, D1Ar5, #1
  290. BNE $Lfaligned_4
  291. ANDS D1Ar3, D1Ar3, #7
  292. BZ $Lfbyte_loop_exit
  293. ! Adjust A1.2
  294. ADD A1.2, A1.2, D0Ar4
  295. B $Lfbyte_loop
  296. .size _memmove,.-_memmove