strncpy_user.S 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * arch/xtensa/lib/strncpy_user.S
  3. *
  4. * This file is subject to the terms and conditions of the GNU General
  5. * Public License. See the file "COPYING" in the main directory of
  6. * this archive for more details.
  7. *
  8. * Returns: -EFAULT if exception before terminator, N if the entire
  9. * buffer filled, else strlen.
  10. *
  11. * Copyright (C) 2002 Tensilica Inc.
  12. */
  13. #include <variant/core.h>
  14. #include <linux/errno.h>
  15. /* Load or store instructions that may cause exceptions use the EX macro. */
  16. #define EX(insn,reg1,reg2,offset,handler) \
  17. 9: insn reg1, reg2, offset; \
  18. .section __ex_table, "a"; \
  19. .word 9b, handler; \
  20. .previous
  21. /*
  22. * char *__strncpy_user(char *dst, const char *src, size_t len)
  23. */
  24. #ifdef __XTENSA_EB__
  25. # define MASK0 0xff000000
  26. # define MASK1 0x00ff0000
  27. # define MASK2 0x0000ff00
  28. # define MASK3 0x000000ff
  29. #else
  30. # define MASK0 0x000000ff
  31. # define MASK1 0x0000ff00
  32. # define MASK2 0x00ff0000
  33. # define MASK3 0xff000000
  34. #endif
  35. # Register use
  36. # a0/ return address
  37. # a1/ stack pointer
  38. # a2/ return value
  39. # a3/ src
  40. # a4/ len
  41. # a5/ mask0
  42. # a6/ mask1
  43. # a7/ mask2
  44. # a8/ mask3
  45. # a9/ tmp
  46. # a10/ tmp
  47. # a11/ dst
  48. # a12/ tmp
  49. .text
  50. .align 4
  51. .global __strncpy_user
  52. .type __strncpy_user,@function
  53. __strncpy_user:
  54. entry sp, 16 # minimal stack frame
  55. # a2/ dst, a3/ src, a4/ len
  56. mov a11, a2 # leave dst in return value register
  57. beqz a4, .Lret # if len is zero
  58. movi a5, MASK0 # mask for byte 0
  59. movi a6, MASK1 # mask for byte 1
  60. movi a7, MASK2 # mask for byte 2
  61. movi a8, MASK3 # mask for byte 3
  62. bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned
  63. bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned
  64. .Lsrcaligned: # return here when src is word-aligned
  65. srli a12, a4, 2 # number of loop iterations with 4B per loop
  66. movi a9, 3
  67. bnone a11, a9, .Laligned
  68. j .Ldstunaligned
  69. .Lsrc1mod2: # src address is odd
  70. EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
  71. addi a3, a3, 1 # advance src pointer
  72. EX(s8i, a9, a11, 0, fixup_s) # store byte 0
  73. beqz a9, .Lret # if byte 0 is zero
  74. addi a11, a11, 1 # advance dst pointer
  75. addi a4, a4, -1 # decrement len
  76. beqz a4, .Lret # if len is zero
  77. bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
  78. .Lsrc2mod4: # src address is 2 mod 4
  79. EX(l8ui, a9, a3, 0, fixup_l) # get byte 0
  80. /* 1-cycle interlock */
  81. EX(s8i, a9, a11, 0, fixup_s) # store byte 0
  82. beqz a9, .Lret # if byte 0 is zero
  83. addi a11, a11, 1 # advance dst pointer
  84. addi a4, a4, -1 # decrement len
  85. beqz a4, .Lret # if len is zero
  86. EX(l8ui, a9, a3, 1, fixup_l) # get byte 0
  87. addi a3, a3, 2 # advance src pointer
  88. EX(s8i, a9, a11, 0, fixup_s) # store byte 0
  89. beqz a9, .Lret # if byte 0 is zero
  90. addi a11, a11, 1 # advance dst pointer
  91. addi a4, a4, -1 # decrement len
  92. bnez a4, .Lsrcaligned # if len is nonzero
  93. .Lret:
  94. sub a2, a11, a2 # compute strlen
  95. retw
  96. /*
  97. * dst is word-aligned, src is word-aligned
  98. */
  99. .align 4 # 1 mod 4 alignment for LOOPNEZ
  100. .byte 0 # (0 mod 4 alignment for LBEG)
  101. .Laligned:
  102. #if XCHAL_HAVE_LOOPS
  103. loopnez a12, .Loop1done
  104. #else
  105. beqz a12, .Loop1done
  106. slli a12, a12, 2
  107. add a12, a12, a11 # a12 = end of last 4B chunck
  108. #endif
  109. .Loop1:
  110. EX(l32i, a9, a3, 0, fixup_l) # get word from src
  111. addi a3, a3, 4 # advance src pointer
  112. bnone a9, a5, .Lz0 # if byte 0 is zero
  113. bnone a9, a6, .Lz1 # if byte 1 is zero
  114. bnone a9, a7, .Lz2 # if byte 2 is zero
  115. EX(s32i, a9, a11, 0, fixup_s) # store word to dst
  116. bnone a9, a8, .Lz3 # if byte 3 is zero
  117. addi a11, a11, 4 # advance dst pointer
  118. #if !XCHAL_HAVE_LOOPS
  119. blt a11, a12, .Loop1
  120. #endif
  121. .Loop1done:
  122. bbci.l a4, 1, .L100
  123. # copy 2 bytes
  124. EX(l16ui, a9, a3, 0, fixup_l)
  125. addi a3, a3, 2 # advance src pointer
  126. #ifdef __XTENSA_EB__
  127. bnone a9, a7, .Lz0 # if byte 2 is zero
  128. bnone a9, a8, .Lz1 # if byte 3 is zero
  129. #else
  130. bnone a9, a5, .Lz0 # if byte 0 is zero
  131. bnone a9, a6, .Lz1 # if byte 1 is zero
  132. #endif
  133. EX(s16i, a9, a11, 0, fixup_s)
  134. addi a11, a11, 2 # advance dst pointer
  135. .L100:
  136. bbci.l a4, 0, .Lret
  137. EX(l8ui, a9, a3, 0, fixup_l)
  138. /* slot */
  139. EX(s8i, a9, a11, 0, fixup_s)
  140. beqz a9, .Lret # if byte is zero
  141. addi a11, a11, 1-3 # advance dst ptr 1, but also cancel
  142. # the effect of adding 3 in .Lz3 code
  143. /* fall thru to .Lz3 and "retw" */
  144. .Lz3: # byte 3 is zero
  145. addi a11, a11, 3 # advance dst pointer
  146. sub a2, a11, a2 # compute strlen
  147. retw
  148. .Lz0: # byte 0 is zero
  149. #ifdef __XTENSA_EB__
  150. movi a9, 0
  151. #endif /* __XTENSA_EB__ */
  152. EX(s8i, a9, a11, 0, fixup_s)
  153. sub a2, a11, a2 # compute strlen
  154. retw
  155. .Lz1: # byte 1 is zero
  156. #ifdef __XTENSA_EB__
  157. extui a9, a9, 16, 16
  158. #endif /* __XTENSA_EB__ */
  159. EX(s16i, a9, a11, 0, fixup_s)
  160. addi a11, a11, 1 # advance dst pointer
  161. sub a2, a11, a2 # compute strlen
  162. retw
  163. .Lz2: # byte 2 is zero
  164. #ifdef __XTENSA_EB__
  165. extui a9, a9, 16, 16
  166. #endif /* __XTENSA_EB__ */
  167. EX(s16i, a9, a11, 0, fixup_s)
  168. movi a9, 0
  169. EX(s8i, a9, a11, 2, fixup_s)
  170. addi a11, a11, 2 # advance dst pointer
  171. sub a2, a11, a2 # compute strlen
  172. retw
  173. .align 4 # 1 mod 4 alignment for LOOPNEZ
  174. .byte 0 # (0 mod 4 alignment for LBEG)
  175. .Ldstunaligned:
  176. /*
  177. * for now just use byte copy loop
  178. */
  179. #if XCHAL_HAVE_LOOPS
  180. loopnez a4, .Lunalignedend
  181. #else
  182. beqz a4, .Lunalignedend
  183. add a12, a11, a4 # a12 = ending address
  184. #endif /* XCHAL_HAVE_LOOPS */
  185. .Lnextbyte:
  186. EX(l8ui, a9, a3, 0, fixup_l)
  187. addi a3, a3, 1
  188. EX(s8i, a9, a11, 0, fixup_s)
  189. beqz a9, .Lunalignedend
  190. addi a11, a11, 1
  191. #if !XCHAL_HAVE_LOOPS
  192. blt a11, a12, .Lnextbyte
  193. #endif
  194. .Lunalignedend:
  195. sub a2, a11, a2 # compute strlen
  196. retw
  197. .section .fixup, "ax"
  198. .align 4
  199. /* For now, just return -EFAULT. Future implementations might
  200. * like to clear remaining kernel space, like the fixup
  201. * implementation in memset(). Thus, we differentiate between
  202. * load/store fixups. */
  203. fixup_s:
  204. fixup_l:
  205. movi a2, -EFAULT
  206. retw