memcpy.S 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. ! Copyright (C) 2008-2012 Imagination Technologies Ltd.
  2. .text
  3. .global _memcpy
  4. .type _memcpy,function
  5. ! D1Ar1 dst
  6. ! D0Ar2 src
  7. ! D1Ar3 cnt
  8. ! D0Re0 dst
  9. _memcpy:
  10. CMP D1Ar3, #16
  11. MOV A1.2, D0Ar2 ! source pointer
  12. MOV A0.2, D1Ar1 ! destination pointer
  13. MOV A0.3, D1Ar1 ! for return value
  14. ! If there are less than 16 bytes to copy use the byte copy loop
  15. BGE $Llong_copy
  16. $Lbyte_copy:
  17. ! Simply copy a byte at a time
  18. SUBS TXRPT, D1Ar3, #1
  19. BLT $Lend
  20. $Lloop_byte:
  21. GETB D1Re0, [A1.2++]
  22. SETB [A0.2++], D1Re0
  23. BR $Lloop_byte
  24. $Lend:
  25. ! Finally set return value and return
  26. MOV D0Re0, A0.3
  27. MOV PC, D1RtP
  28. $Llong_copy:
  29. ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
  30. BZ $Laligned_dst
  31. ! The destination address is not 8 byte aligned. We will copy bytes from
  32. ! the source to the destination until the remaining data has an 8 byte
  33. ! destination address alignment (i.e we should never copy more than 7
  34. ! bytes here).
  35. $Lalign_dst:
  36. GETB D0Re0, [A1.2++]
  37. ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
  38. SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
  39. SETB [A0.2++], D0Re0
  40. CMP D1Ar5, #8
  41. BNE $Lalign_dst
  42. ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
  43. ! blocks, then jump to the unaligned copy loop or fall through to the aligned
  44. ! copy loop as appropriate.
  45. $Laligned_dst:
  46. MOV D0Ar4, A1.2
  47. LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
  48. ANDS D0Ar4, D0Ar4, #7 ! test source alignment
  49. BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
  50. ! Both source and destination are 8 byte aligned - the easy case.
  51. $Laligned_copy:
  52. LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
  53. BZ $Lbyte_copy
  54. SUB TXRPT, D1Ar5, #1
  55. $Laligned_32:
  56. GETL D0Re0, D1Re0, [A1.2++]
  57. GETL D0Ar6, D1Ar5, [A1.2++]
  58. SETL [A0.2++], D0Re0, D1Re0
  59. SETL [A0.2++], D0Ar6, D1Ar5
  60. GETL D0Re0, D1Re0, [A1.2++]
  61. GETL D0Ar6, D1Ar5, [A1.2++]
  62. SETL [A0.2++], D0Re0, D1Re0
  63. SETL [A0.2++], D0Ar6, D1Ar5
  64. BR $Laligned_32
  65. ! If there are any remaining bytes use the byte copy loop, otherwise we are done
  66. ANDS D1Ar3, D1Ar3, #0x1f
  67. BNZ $Lbyte_copy
  68. B $Lend
  69. ! The destination is 8 byte aligned but the source is not, and there are 8
  70. ! or more bytes to be copied.
  71. $Lunaligned_copy:
  72. ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
  73. ! current value
  74. MOV D0Ar4, A1.2
  75. MOV D0Ar6, A1.2
  76. ANDMB D0Ar4, D0Ar4, #0xfff8
  77. MOV A1.2, D0Ar4
  78. ! Save the number of bytes of mis-alignment in D0Ar4 for use later
  79. SUBS D0Ar6, D0Ar6, D0Ar4
  80. MOV D0Ar4, D0Ar6
  81. ! if there is no mis-alignment after all, use the aligned copy loop
  82. BZ $Laligned_copy
  83. ! prefetch 8 bytes
  84. GETL D0Re0, D1Re0, [A1.2]
  85. SUB TXRPT, D1Ar5, #1
  86. ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
  87. ! 4 bytes, and more than 4 bytes.
  88. CMP D0Ar6, #4
  89. BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
  90. BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
  91. ! The mis-alignment is more than 4 bytes
  92. $Lunaligned_5_6_7:
  93. SUB D0Ar6, D0Ar6, #4
  94. ! Calculate the bit offsets required for the shift operations necesssary
  95. ! to align the data.
  96. ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
  97. MULW D0Ar6, D0Ar6, #8
  98. MOV D1Ar5, #32
  99. SUB D1Ar5, D1Ar5, D0Ar6
  100. ! Move data 4 bytes before we enter the main loop
  101. MOV D0Re0, D1Re0
  102. $Lloop_5_6_7:
  103. GETL D0Ar2, D1Ar1, [++A1.2]
  104. ! form 64-bit data in D0Re0, D1Re0
  105. LSR D0Re0, D0Re0, D0Ar6
  106. MOV D1Re0, D0Ar2
  107. LSL D1Re0, D1Re0, D1Ar5
  108. ADD D0Re0, D0Re0, D1Re0
  109. LSR D0Ar2, D0Ar2, D0Ar6
  110. LSL D1Re0, D1Ar1, D1Ar5
  111. ADD D1Re0, D1Re0, D0Ar2
  112. SETL [A0.2++], D0Re0, D1Re0
  113. MOV D0Re0, D1Ar1
  114. BR $Lloop_5_6_7
  115. B $Lunaligned_end
  116. $Lunaligned_1_2_3:
  117. ! Calculate the bit offsets required for the shift operations necesssary
  118. ! to align the data.
  119. ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
  120. MULW D0Ar6, D0Ar6, #8
  121. MOV D1Ar5, #32
  122. SUB D1Ar5, D1Ar5, D0Ar6
  123. $Lloop_1_2_3:
  124. ! form 64-bit data in D0Re0,D1Re0
  125. LSR D0Re0, D0Re0, D0Ar6
  126. LSL D1Ar1, D1Re0, D1Ar5
  127. ADD D0Re0, D0Re0, D1Ar1
  128. MOV D0Ar2, D1Re0
  129. LSR D0FrT, D0Ar2, D0Ar6
  130. GETL D0Ar2, D1Ar1, [++A1.2]
  131. MOV D1Re0, D0Ar2
  132. LSL D1Re0, D1Re0, D1Ar5
  133. ADD D1Re0, D1Re0, D0FrT
  134. SETL [A0.2++], D0Re0, D1Re0
  135. MOV D0Re0, D0Ar2
  136. MOV D1Re0, D1Ar1
  137. BR $Lloop_1_2_3
  138. B $Lunaligned_end
  139. ! The 4 byte mis-alignment case - this does not require any shifting, just a
  140. ! shuffling of registers.
  141. $Lunaligned_4:
  142. MOV D0Re0, D1Re0
  143. $Lloop_4:
  144. GETL D0Ar2, D1Ar1, [++A1.2]
  145. MOV D1Re0, D0Ar2
  146. SETL [A0.2++], D0Re0, D1Re0
  147. MOV D0Re0, D1Ar1
  148. BR $Lloop_4
  149. $Lunaligned_end:
  150. ! If there are no remaining bytes to copy, we are done.
  151. ANDS D1Ar3, D1Ar3, #7
  152. BZ $Lend
  153. ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
  154. ! address of the remaining bytes, and fall through to the byte copy loop.
  155. MOV D0Ar6, A1.2
  156. ADD D1Ar5, D0Ar4, D0Ar6
  157. MOV A1.2, D1Ar5
  158. B $Lbyte_copy
  159. .size _memcpy,.-_memcpy