copyuser_power7.S 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License as published by
  4. * the Free Software Foundation; either version 2 of the License, or
  5. * (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program; if not, write to the Free Software
  14. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15. *
  16. * Copyright (C) IBM Corporation, 2011
  17. *
  18. * Author: Anton Blanchard <anton@au.ibm.com>
  19. */
  20. #include <asm/ppc_asm.h>
  21. #define STACKFRAMESIZE 256
  22. #define STK_REG(i) (112 + ((i)-14)*8)
  23. .macro err1
  24. 100:
  25. .section __ex_table,"a"
  26. .align 3
  27. .llong 100b,.Ldo_err1
  28. .previous
  29. .endm
  30. .macro err2
  31. 200:
  32. .section __ex_table,"a"
  33. .align 3
  34. .llong 200b,.Ldo_err2
  35. .previous
  36. .endm
  37. #ifdef CONFIG_ALTIVEC
  38. .macro err3
  39. 300:
  40. .section __ex_table,"a"
  41. .align 3
  42. .llong 300b,.Ldo_err3
  43. .previous
  44. .endm
  45. .macro err4
  46. 400:
  47. .section __ex_table,"a"
  48. .align 3
  49. .llong 400b,.Ldo_err4
  50. .previous
  51. .endm
  52. .Ldo_err4:
  53. ld r16,STK_REG(r16)(r1)
  54. ld r15,STK_REG(r15)(r1)
  55. ld r14,STK_REG(r14)(r1)
  56. .Ldo_err3:
  57. bl .exit_vmx_copy
  58. ld r0,STACKFRAMESIZE+16(r1)
  59. mtlr r0
  60. b .Lexit
  61. #endif /* CONFIG_ALTIVEC */
  62. .Ldo_err2:
  63. ld r22,STK_REG(r22)(r1)
  64. ld r21,STK_REG(r21)(r1)
  65. ld r20,STK_REG(r20)(r1)
  66. ld r19,STK_REG(r19)(r1)
  67. ld r18,STK_REG(r18)(r1)
  68. ld r17,STK_REG(r17)(r1)
  69. ld r16,STK_REG(r16)(r1)
  70. ld r15,STK_REG(r15)(r1)
  71. ld r14,STK_REG(r14)(r1)
  72. .Lexit:
  73. addi r1,r1,STACKFRAMESIZE
  74. .Ldo_err1:
  75. ld r3,48(r1)
  76. ld r4,56(r1)
  77. ld r5,64(r1)
  78. b __copy_tofrom_user_base
  79. _GLOBAL(__copy_tofrom_user_power7)
  80. #ifdef CONFIG_ALTIVEC
  81. cmpldi r5,16
  82. cmpldi cr1,r5,4096
  83. std r3,48(r1)
  84. std r4,56(r1)
  85. std r5,64(r1)
  86. blt .Lshort_copy
  87. bgt cr1,.Lvmx_copy
  88. #else
  89. cmpldi r5,16
  90. std r3,48(r1)
  91. std r4,56(r1)
  92. std r5,64(r1)
  93. blt .Lshort_copy
  94. #endif
  95. .Lnonvmx_copy:
  96. /* Get the source 8B aligned */
  97. neg r6,r4
  98. mtocrf 0x01,r6
  99. clrldi r6,r6,(64-3)
  100. bf cr7*4+3,1f
  101. err1; lbz r0,0(r4)
  102. addi r4,r4,1
  103. err1; stb r0,0(r3)
  104. addi r3,r3,1
  105. 1: bf cr7*4+2,2f
  106. err1; lhz r0,0(r4)
  107. addi r4,r4,2
  108. err1; sth r0,0(r3)
  109. addi r3,r3,2
  110. 2: bf cr7*4+1,3f
  111. err1; lwz r0,0(r4)
  112. addi r4,r4,4
  113. err1; stw r0,0(r3)
  114. addi r3,r3,4
  115. 3: sub r5,r5,r6
  116. cmpldi r5,128
  117. blt 5f
  118. mflr r0
  119. stdu r1,-STACKFRAMESIZE(r1)
  120. std r14,STK_REG(r14)(r1)
  121. std r15,STK_REG(r15)(r1)
  122. std r16,STK_REG(r16)(r1)
  123. std r17,STK_REG(r17)(r1)
  124. std r18,STK_REG(r18)(r1)
  125. std r19,STK_REG(r19)(r1)
  126. std r20,STK_REG(r20)(r1)
  127. std r21,STK_REG(r21)(r1)
  128. std r22,STK_REG(r22)(r1)
  129. std r0,STACKFRAMESIZE+16(r1)
  130. srdi r6,r5,7
  131. mtctr r6
  132. /* Now do cacheline (128B) sized loads and stores. */
  133. .align 5
  134. 4:
  135. err2; ld r0,0(r4)
  136. err2; ld r6,8(r4)
  137. err2; ld r7,16(r4)
  138. err2; ld r8,24(r4)
  139. err2; ld r9,32(r4)
  140. err2; ld r10,40(r4)
  141. err2; ld r11,48(r4)
  142. err2; ld r12,56(r4)
  143. err2; ld r14,64(r4)
  144. err2; ld r15,72(r4)
  145. err2; ld r16,80(r4)
  146. err2; ld r17,88(r4)
  147. err2; ld r18,96(r4)
  148. err2; ld r19,104(r4)
  149. err2; ld r20,112(r4)
  150. err2; ld r21,120(r4)
  151. addi r4,r4,128
  152. err2; std r0,0(r3)
  153. err2; std r6,8(r3)
  154. err2; std r7,16(r3)
  155. err2; std r8,24(r3)
  156. err2; std r9,32(r3)
  157. err2; std r10,40(r3)
  158. err2; std r11,48(r3)
  159. err2; std r12,56(r3)
  160. err2; std r14,64(r3)
  161. err2; std r15,72(r3)
  162. err2; std r16,80(r3)
  163. err2; std r17,88(r3)
  164. err2; std r18,96(r3)
  165. err2; std r19,104(r3)
  166. err2; std r20,112(r3)
  167. err2; std r21,120(r3)
  168. addi r3,r3,128
  169. bdnz 4b
  170. clrldi r5,r5,(64-7)
  171. ld r14,STK_REG(r14)(r1)
  172. ld r15,STK_REG(r15)(r1)
  173. ld r16,STK_REG(r16)(r1)
  174. ld r17,STK_REG(r17)(r1)
  175. ld r18,STK_REG(r18)(r1)
  176. ld r19,STK_REG(r19)(r1)
  177. ld r20,STK_REG(r20)(r1)
  178. ld r21,STK_REG(r21)(r1)
  179. ld r22,STK_REG(r22)(r1)
  180. addi r1,r1,STACKFRAMESIZE
  181. /* Up to 127B to go */
  182. 5: srdi r6,r5,4
  183. mtocrf 0x01,r6
  184. 6: bf cr7*4+1,7f
  185. err1; ld r0,0(r4)
  186. err1; ld r6,8(r4)
  187. err1; ld r7,16(r4)
  188. err1; ld r8,24(r4)
  189. err1; ld r9,32(r4)
  190. err1; ld r10,40(r4)
  191. err1; ld r11,48(r4)
  192. err1; ld r12,56(r4)
  193. addi r4,r4,64
  194. err1; std r0,0(r3)
  195. err1; std r6,8(r3)
  196. err1; std r7,16(r3)
  197. err1; std r8,24(r3)
  198. err1; std r9,32(r3)
  199. err1; std r10,40(r3)
  200. err1; std r11,48(r3)
  201. err1; std r12,56(r3)
  202. addi r3,r3,64
  203. /* Up to 63B to go */
  204. 7: bf cr7*4+2,8f
  205. err1; ld r0,0(r4)
  206. err1; ld r6,8(r4)
  207. err1; ld r7,16(r4)
  208. err1; ld r8,24(r4)
  209. addi r4,r4,32
  210. err1; std r0,0(r3)
  211. err1; std r6,8(r3)
  212. err1; std r7,16(r3)
  213. err1; std r8,24(r3)
  214. addi r3,r3,32
  215. /* Up to 31B to go */
  216. 8: bf cr7*4+3,9f
  217. err1; ld r0,0(r4)
  218. err1; ld r6,8(r4)
  219. addi r4,r4,16
  220. err1; std r0,0(r3)
  221. err1; std r6,8(r3)
  222. addi r3,r3,16
  223. 9: clrldi r5,r5,(64-4)
  224. /* Up to 15B to go */
  225. .Lshort_copy:
  226. mtocrf 0x01,r5
  227. bf cr7*4+0,12f
  228. err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
  229. err1; lwz r6,4(r4)
  230. addi r4,r4,8
  231. err1; stw r0,0(r3)
  232. err1; stw r6,4(r3)
  233. addi r3,r3,8
  234. 12: bf cr7*4+1,13f
  235. err1; lwz r0,0(r4)
  236. addi r4,r4,4
  237. err1; stw r0,0(r3)
  238. addi r3,r3,4
  239. 13: bf cr7*4+2,14f
  240. err1; lhz r0,0(r4)
  241. addi r4,r4,2
  242. err1; sth r0,0(r3)
  243. addi r3,r3,2
  244. 14: bf cr7*4+3,15f
  245. err1; lbz r0,0(r4)
  246. err1; stb r0,0(r3)
  247. 15: li r3,0
  248. blr
  249. .Lunwind_stack_nonvmx_copy:
  250. addi r1,r1,STACKFRAMESIZE
  251. b .Lnonvmx_copy
  252. #ifdef CONFIG_ALTIVEC
  253. .Lvmx_copy:
  254. mflr r0
  255. std r0,16(r1)
  256. stdu r1,-STACKFRAMESIZE(r1)
  257. bl .enter_vmx_copy
  258. cmpwi r3,0
  259. ld r0,STACKFRAMESIZE+16(r1)
  260. ld r3,STACKFRAMESIZE+48(r1)
  261. ld r4,STACKFRAMESIZE+56(r1)
  262. ld r5,STACKFRAMESIZE+64(r1)
  263. mtlr r0
  264. beq .Lunwind_stack_nonvmx_copy
  265. /*
  266. * If source and destination are not relatively aligned we use a
  267. * slower permute loop.
  268. */
  269. xor r6,r4,r3
  270. rldicl. r6,r6,0,(64-4)
  271. bne .Lvmx_unaligned_copy
  272. /* Get the destination 16B aligned */
  273. neg r6,r3
  274. mtocrf 0x01,r6
  275. clrldi r6,r6,(64-4)
  276. bf cr7*4+3,1f
  277. err3; lbz r0,0(r4)
  278. addi r4,r4,1
  279. err3; stb r0,0(r3)
  280. addi r3,r3,1
  281. 1: bf cr7*4+2,2f
  282. err3; lhz r0,0(r4)
  283. addi r4,r4,2
  284. err3; sth r0,0(r3)
  285. addi r3,r3,2
  286. 2: bf cr7*4+1,3f
  287. err3; lwz r0,0(r4)
  288. addi r4,r4,4
  289. err3; stw r0,0(r3)
  290. addi r3,r3,4
  291. 3: bf cr7*4+0,4f
  292. err3; ld r0,0(r4)
  293. addi r4,r4,8
  294. err3; std r0,0(r3)
  295. addi r3,r3,8
  296. 4: sub r5,r5,r6
  297. /* Get the desination 128B aligned */
  298. neg r6,r3
  299. srdi r7,r6,4
  300. mtocrf 0x01,r7
  301. clrldi r6,r6,(64-7)
  302. li r9,16
  303. li r10,32
  304. li r11,48
  305. bf cr7*4+3,5f
  306. err3; lvx vr1,r0,r4
  307. addi r4,r4,16
  308. err3; stvx vr1,r0,r3
  309. addi r3,r3,16
  310. 5: bf cr7*4+2,6f
  311. err3; lvx vr1,r0,r4
  312. err3; lvx vr0,r4,r9
  313. addi r4,r4,32
  314. err3; stvx vr1,r0,r3
  315. err3; stvx vr0,r3,r9
  316. addi r3,r3,32
  317. 6: bf cr7*4+1,7f
  318. err3; lvx vr3,r0,r4
  319. err3; lvx vr2,r4,r9
  320. err3; lvx vr1,r4,r10
  321. err3; lvx vr0,r4,r11
  322. addi r4,r4,64
  323. err3; stvx vr3,r0,r3
  324. err3; stvx vr2,r3,r9
  325. err3; stvx vr1,r3,r10
  326. err3; stvx vr0,r3,r11
  327. addi r3,r3,64
  328. 7: sub r5,r5,r6
  329. srdi r6,r5,7
  330. std r14,STK_REG(r14)(r1)
  331. std r15,STK_REG(r15)(r1)
  332. std r16,STK_REG(r16)(r1)
  333. li r12,64
  334. li r14,80
  335. li r15,96
  336. li r16,112
  337. mtctr r6
  338. /*
  339. * Now do cacheline sized loads and stores. By this stage the
  340. * cacheline stores are also cacheline aligned.
  341. */
  342. .align 5
  343. 8:
  344. err4; lvx vr7,r0,r4
  345. err4; lvx vr6,r4,r9
  346. err4; lvx vr5,r4,r10
  347. err4; lvx vr4,r4,r11
  348. err4; lvx vr3,r4,r12
  349. err4; lvx vr2,r4,r14
  350. err4; lvx vr1,r4,r15
  351. err4; lvx vr0,r4,r16
  352. addi r4,r4,128
  353. err4; stvx vr7,r0,r3
  354. err4; stvx vr6,r3,r9
  355. err4; stvx vr5,r3,r10
  356. err4; stvx vr4,r3,r11
  357. err4; stvx vr3,r3,r12
  358. err4; stvx vr2,r3,r14
  359. err4; stvx vr1,r3,r15
  360. err4; stvx vr0,r3,r16
  361. addi r3,r3,128
  362. bdnz 8b
  363. ld r14,STK_REG(r14)(r1)
  364. ld r15,STK_REG(r15)(r1)
  365. ld r16,STK_REG(r16)(r1)
  366. /* Up to 127B to go */
  367. clrldi r5,r5,(64-7)
  368. srdi r6,r5,4
  369. mtocrf 0x01,r6
  370. bf cr7*4+1,9f
  371. err3; lvx vr3,r0,r4
  372. err3; lvx vr2,r4,r9
  373. err3; lvx vr1,r4,r10
  374. err3; lvx vr0,r4,r11
  375. addi r4,r4,64
  376. err3; stvx vr3,r0,r3
  377. err3; stvx vr2,r3,r9
  378. err3; stvx vr1,r3,r10
  379. err3; stvx vr0,r3,r11
  380. addi r3,r3,64
  381. 9: bf cr7*4+2,10f
  382. err3; lvx vr1,r0,r4
  383. err3; lvx vr0,r4,r9
  384. addi r4,r4,32
  385. err3; stvx vr1,r0,r3
  386. err3; stvx vr0,r3,r9
  387. addi r3,r3,32
  388. 10: bf cr7*4+3,11f
  389. err3; lvx vr1,r0,r4
  390. addi r4,r4,16
  391. err3; stvx vr1,r0,r3
  392. addi r3,r3,16
  393. /* Up to 15B to go */
  394. 11: clrldi r5,r5,(64-4)
  395. mtocrf 0x01,r5
  396. bf cr7*4+0,12f
  397. err3; ld r0,0(r4)
  398. addi r4,r4,8
  399. err3; std r0,0(r3)
  400. addi r3,r3,8
  401. 12: bf cr7*4+1,13f
  402. err3; lwz r0,0(r4)
  403. addi r4,r4,4
  404. err3; stw r0,0(r3)
  405. addi r3,r3,4
  406. 13: bf cr7*4+2,14f
  407. err3; lhz r0,0(r4)
  408. addi r4,r4,2
  409. err3; sth r0,0(r3)
  410. addi r3,r3,2
  411. 14: bf cr7*4+3,15f
  412. err3; lbz r0,0(r4)
  413. err3; stb r0,0(r3)
  414. 15: addi r1,r1,STACKFRAMESIZE
  415. b .exit_vmx_copy /* tail call optimise */
  416. .Lvmx_unaligned_copy:
  417. /* Get the destination 16B aligned */
  418. neg r6,r3
  419. mtocrf 0x01,r6
  420. clrldi r6,r6,(64-4)
  421. bf cr7*4+3,1f
  422. err3; lbz r0,0(r4)
  423. addi r4,r4,1
  424. err3; stb r0,0(r3)
  425. addi r3,r3,1
  426. 1: bf cr7*4+2,2f
  427. err3; lhz r0,0(r4)
  428. addi r4,r4,2
  429. err3; sth r0,0(r3)
  430. addi r3,r3,2
  431. 2: bf cr7*4+1,3f
  432. err3; lwz r0,0(r4)
  433. addi r4,r4,4
  434. err3; stw r0,0(r3)
  435. addi r3,r3,4
  436. 3: bf cr7*4+0,4f
  437. err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
  438. err3; lwz r7,4(r4)
  439. addi r4,r4,8
  440. err3; stw r0,0(r3)
  441. err3; stw r7,4(r3)
  442. addi r3,r3,8
  443. 4: sub r5,r5,r6
  444. /* Get the desination 128B aligned */
  445. neg r6,r3
  446. srdi r7,r6,4
  447. mtocrf 0x01,r7
  448. clrldi r6,r6,(64-7)
  449. li r9,16
  450. li r10,32
  451. li r11,48
  452. lvsl vr16,0,r4 /* Setup permute control vector */
  453. err3; lvx vr0,0,r4
  454. addi r4,r4,16
  455. bf cr7*4+3,5f
  456. err3; lvx vr1,r0,r4
  457. vperm vr8,vr0,vr1,vr16
  458. addi r4,r4,16
  459. err3; stvx vr8,r0,r3
  460. addi r3,r3,16
  461. vor vr0,vr1,vr1
  462. 5: bf cr7*4+2,6f
  463. err3; lvx vr1,r0,r4
  464. vperm vr8,vr0,vr1,vr16
  465. err3; lvx vr0,r4,r9
  466. vperm vr9,vr1,vr0,vr16
  467. addi r4,r4,32
  468. err3; stvx vr8,r0,r3
  469. err3; stvx vr9,r3,r9
  470. addi r3,r3,32
  471. 6: bf cr7*4+1,7f
  472. err3; lvx vr3,r0,r4
  473. vperm vr8,vr0,vr3,vr16
  474. err3; lvx vr2,r4,r9
  475. vperm vr9,vr3,vr2,vr16
  476. err3; lvx vr1,r4,r10
  477. vperm vr10,vr2,vr1,vr16
  478. err3; lvx vr0,r4,r11
  479. vperm vr11,vr1,vr0,vr16
  480. addi r4,r4,64
  481. err3; stvx vr8,r0,r3
  482. err3; stvx vr9,r3,r9
  483. err3; stvx vr10,r3,r10
  484. err3; stvx vr11,r3,r11
  485. addi r3,r3,64
  486. 7: sub r5,r5,r6
  487. srdi r6,r5,7
  488. std r14,STK_REG(r14)(r1)
  489. std r15,STK_REG(r15)(r1)
  490. std r16,STK_REG(r16)(r1)
  491. li r12,64
  492. li r14,80
  493. li r15,96
  494. li r16,112
  495. mtctr r6
  496. /*
  497. * Now do cacheline sized loads and stores. By this stage the
  498. * cacheline stores are also cacheline aligned.
  499. */
  500. .align 5
  501. 8:
  502. err4; lvx vr7,r0,r4
  503. vperm vr8,vr0,vr7,vr16
  504. err4; lvx vr6,r4,r9
  505. vperm vr9,vr7,vr6,vr16
  506. err4; lvx vr5,r4,r10
  507. vperm vr10,vr6,vr5,vr16
  508. err4; lvx vr4,r4,r11
  509. vperm vr11,vr5,vr4,vr16
  510. err4; lvx vr3,r4,r12
  511. vperm vr12,vr4,vr3,vr16
  512. err4; lvx vr2,r4,r14
  513. vperm vr13,vr3,vr2,vr16
  514. err4; lvx vr1,r4,r15
  515. vperm vr14,vr2,vr1,vr16
  516. err4; lvx vr0,r4,r16
  517. vperm vr15,vr1,vr0,vr16
  518. addi r4,r4,128
  519. err4; stvx vr8,r0,r3
  520. err4; stvx vr9,r3,r9
  521. err4; stvx vr10,r3,r10
  522. err4; stvx vr11,r3,r11
  523. err4; stvx vr12,r3,r12
  524. err4; stvx vr13,r3,r14
  525. err4; stvx vr14,r3,r15
  526. err4; stvx vr15,r3,r16
  527. addi r3,r3,128
  528. bdnz 8b
  529. ld r14,STK_REG(r14)(r1)
  530. ld r15,STK_REG(r15)(r1)
  531. ld r16,STK_REG(r16)(r1)
  532. /* Up to 127B to go */
  533. clrldi r5,r5,(64-7)
  534. srdi r6,r5,4
  535. mtocrf 0x01,r6
  536. bf cr7*4+1,9f
  537. err3; lvx vr3,r0,r4
  538. vperm vr8,vr0,vr3,vr16
  539. err3; lvx vr2,r4,r9
  540. vperm vr9,vr3,vr2,vr16
  541. err3; lvx vr1,r4,r10
  542. vperm vr10,vr2,vr1,vr16
  543. err3; lvx vr0,r4,r11
  544. vperm vr11,vr1,vr0,vr16
  545. addi r4,r4,64
  546. err3; stvx vr8,r0,r3
  547. err3; stvx vr9,r3,r9
  548. err3; stvx vr10,r3,r10
  549. err3; stvx vr11,r3,r11
  550. addi r3,r3,64
  551. 9: bf cr7*4+2,10f
  552. err3; lvx vr1,r0,r4
  553. vperm vr8,vr0,vr1,vr16
  554. err3; lvx vr0,r4,r9
  555. vperm vr9,vr1,vr0,vr16
  556. addi r4,r4,32
  557. err3; stvx vr8,r0,r3
  558. err3; stvx vr9,r3,r9
  559. addi r3,r3,32
  560. 10: bf cr7*4+3,11f
  561. err3; lvx vr1,r0,r4
  562. vperm vr8,vr0,vr1,vr16
  563. addi r4,r4,16
  564. err3; stvx vr8,r0,r3
  565. addi r3,r3,16
  566. /* Up to 15B to go */
  567. 11: clrldi r5,r5,(64-4)
  568. addi r4,r4,-16 /* Unwind the +16 load offset */
  569. mtocrf 0x01,r5
  570. bf cr7*4+0,12f
  571. err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
  572. err3; lwz r6,4(r4)
  573. addi r4,r4,8
  574. err3; stw r0,0(r3)
  575. err3; stw r6,4(r3)
  576. addi r3,r3,8
  577. 12: bf cr7*4+1,13f
  578. err3; lwz r0,0(r4)
  579. addi r4,r4,4
  580. err3; stw r0,0(r3)
  581. addi r3,r3,4
  582. 13: bf cr7*4+2,14f
  583. err3; lhz r0,0(r4)
  584. addi r4,r4,2
  585. err3; sth r0,0(r3)
  586. addi r3,r3,2
  587. 14: bf cr7*4+3,15f
  588. err3; lbz r0,0(r4)
  589. err3; stb r0,0(r3)
  590. 15: addi r1,r1,STACKFRAMESIZE
  591. b .exit_vmx_copy /* tail call optimise */
  592. #endif /* CONFiG_ALTIVEC */