0001-Fix-for-issue-1114-compile-error.patch 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. From 7361ef732b432e153496c30da66081d7e530c7f6 Mon Sep 17 00:00:00 2001
  2. From: Peter de Rivaz <peter.derivaz@argondesign.com>
  3. Date: Mon, 14 Dec 2015 16:35:29 +0000
  4. Subject: [PATCH] Fix for issue 1114 compile error
  5. In 32-bit build with --enable-shared, there is a lot of
  6. register pressure and register src_strideq is reused.
  7. The code needs to use the stack based version of src_stride,
  8. but this doesn't compile when used in an lea instruction.
  9. This patch also fixes a related segmentation fault caused by the
  10. implementation using src_strideq even though it has been
  11. reused.
  12. This patch also fixes the HBD subpel variance tests that fail
  13. when compiled without disable-optimizations.
  14. These failures were caused by local variables in the assembler
  15. routines colliding with the caller's stack frame.
  16. Change-Id: Ice9d4dafdcbdc6038ad5ee7c1c09a8f06deca362
  17. ---
  18. vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm | 18 +++----
  19. vpx_dsp/x86/highbd_variance_sse2.c | 64 ++++++++++++++----------
  20. 2 files changed, 44 insertions(+), 38 deletions(-)
  21. diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
  22. index 22d52a2..30ee81b 100644
  23. --- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
  24. +++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
  25. @@ -79,20 +79,13 @@ SECTION .text
  26. %macro INC_SRC_BY_SRC_STRIDE 0
  27. %if ARCH_X86=1 && CONFIG_PIC=1
  28. - lea srcq, [srcq + src_stridemp*2]
  29. + add srcq, src_stridemp
  30. + add srcq, src_stridemp
  31. %else
  32. lea srcq, [srcq + src_strideq*2]
  33. %endif
  34. %endmacro
  35. -%macro INC_SRC_BY_SRC_2STRIDE 0
  36. -%if ARCH_X86=1 && CONFIG_PIC=1
  37. - lea srcq, [srcq + src_stridemp*4]
  38. -%else
  39. - lea srcq, [srcq + src_strideq*4]
  40. -%endif
  41. -%endmacro
  42. -
  43. %macro SUBPEL_VARIANCE 1-2 0 ; W
  44. %define bilin_filter_m bilin_filter_m_sse2
  45. %define filter_idx_shift 5
  46. @@ -984,8 +977,9 @@ SECTION .text
  47. .x_other_y_other_loop:
  48. movu m2, [srcq]
  49. movu m4, [srcq+2]
  50. - movu m3, [srcq+src_strideq*2]
  51. - movu m5, [srcq+src_strideq*2+2]
  52. + INC_SRC_BY_SRC_STRIDE
  53. + movu m3, [srcq]
  54. + movu m5, [srcq+2]
  55. pmullw m2, filter_x_a
  56. pmullw m4, filter_x_b
  57. paddw m2, filter_rnd
  58. @@ -1018,7 +1012,7 @@ SECTION .text
  59. SUM_SSE m0, m2, m4, m3, m6, m7
  60. mova m0, m5
  61. - INC_SRC_BY_SRC_2STRIDE
  62. + INC_SRC_BY_SRC_STRIDE
  63. lea dstq, [dstq + dst_strideq * 4]
  64. %if %2 == 1 ; avg
  65. add secq, sec_str
  66. diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
  67. index b45331c..81ec5db 100644
  68. --- a/vpx_dsp/x86/highbd_variance_sse2.c
  69. +++ b/vpx_dsp/x86/highbd_variance_sse2.c
  70. @@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
  71. }
  72. #if CONFIG_USE_X86INC
  73. +// The 2 unused parameters are place holders for PIC enabled build.
  74. +// These definitions are for functions defined in
  75. +// highbd_subpel_variance_impl_sse2.asm
  76. #define DECL(w, opt) \
  77. int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
  78. ptrdiff_t src_stride, \
  79. int x_offset, int y_offset, \
  80. const uint16_t *dst, \
  81. ptrdiff_t dst_stride, \
  82. - int height, unsigned int *sse);
  83. + int height, \
  84. + unsigned int *sse, \
  85. + void *unused0, void *unused);
  86. #define DECLS(opt1, opt2) \
  87. DECL(8, opt1); \
  88. DECL(16, opt1)
  89. @@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
  90. int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
  91. x_offset, y_offset, \
  92. dst, dst_stride, h, \
  93. - &sse); \
  94. + &sse, NULL, NULL); \
  95. if (w > wf) { \
  96. unsigned int sse2; \
  97. int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
  98. @@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
  99. x_offset, y_offset, \
  100. dst + 16, \
  101. dst_stride, \
  102. - h, &sse2); \
  103. + h, &sse2, \
  104. + NULL, NULL); \
  105. se += se2; \
  106. sse += sse2; \
  107. if (w > wf * 2) { \
  108. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
  109. x_offset, y_offset, \
  110. dst + 32, dst_stride, \
  111. - h, &sse2); \
  112. + h, &sse2, NULL, NULL); \
  113. se += se2; \
  114. sse += sse2; \
  115. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
  116. src + 48, src_stride, x_offset, y_offset, \
  117. - dst + 48, dst_stride, h, &sse2); \
  118. + dst + 48, dst_stride, h, &sse2, NULL, NULL); \
  119. se += se2; \
  120. sse += sse2; \
  121. } \
  122. @@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
  123. int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
  124. x_offset, y_offset, \
  125. dst, dst_stride, \
  126. - h, &sse); \
  127. + h, &sse, NULL, NULL); \
  128. if (w > wf) { \
  129. uint32_t sse2; \
  130. int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
  131. @@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
  132. x_offset, y_offset, \
  133. dst + 16, \
  134. dst_stride, \
  135. - h, &sse2); \
  136. + h, &sse2, \
  137. + NULL, NULL); \
  138. se += se2; \
  139. sse += sse2; \
  140. if (w > wf * 2) { \
  141. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
  142. x_offset, y_offset, \
  143. dst + 32, dst_stride, \
  144. - h, &sse2); \
  145. + h, &sse2, NULL, NULL); \
  146. se += se2; \
  147. sse += sse2; \
  148. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
  149. x_offset, y_offset, \
  150. dst + 48, dst_stride, \
  151. - h, &sse2); \
  152. + h, &sse2, NULL, NULL); \
  153. se += se2; \
  154. sse += sse2; \
  155. } \
  156. @@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
  157. int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
  158. src + (start_row * src_stride), src_stride, \
  159. x_offset, y_offset, dst + (start_row * dst_stride), \
  160. - dst_stride, height, &sse2); \
  161. + dst_stride, height, &sse2, NULL, NULL); \
  162. se += se2; \
  163. long_sse += sse2; \
  164. if (w > wf) { \
  165. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
  166. src + 16 + (start_row * src_stride), src_stride, \
  167. x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
  168. - dst_stride, height, &sse2); \
  169. + dst_stride, height, &sse2, NULL, NULL); \
  170. se += se2; \
  171. long_sse += sse2; \
  172. if (w > wf * 2) { \
  173. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
  174. src + 32 + (start_row * src_stride), src_stride, \
  175. x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
  176. - dst_stride, height, &sse2); \
  177. + dst_stride, height, &sse2, NULL, NULL); \
  178. se += se2; \
  179. long_sse += sse2; \
  180. se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
  181. src + 48 + (start_row * src_stride), src_stride, \
  182. x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
  183. - dst_stride, height, &sse2); \
  184. + dst_stride, height, &sse2, NULL, NULL); \
  185. se += se2; \
  186. long_sse += sse2; \
  187. }\
  188. @@ -410,6 +417,7 @@ FNS(sse2, sse);
  189. #undef FNS
  190. #undef FN
  191. +// The 2 unused parameters are place holders for PIC enabled build.
  192. #define DECL(w, opt) \
  193. int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
  194. ptrdiff_t src_stride, \
  195. @@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
  196. const uint16_t *sec, \
  197. ptrdiff_t sec_stride, \
  198. int height, \
  199. - unsigned int *sse);
  200. + unsigned int *sse, \
  201. + void *unused0, void *unused);
  202. #define DECLS(opt1) \
  203. DECL(16, opt1) \
  204. DECL(8, opt1)
  205. @@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
  206. uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
  207. int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  208. src, src_stride, x_offset, \
  209. - y_offset, dst, dst_stride, sec, w, h, &sse); \
  210. + y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
  211. if (w > wf) { \
  212. uint32_t sse2; \
  213. int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  214. src + 16, src_stride, x_offset, y_offset, \
  215. - dst + 16, dst_stride, sec + 16, w, h, &sse2); \
  216. + dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
  217. se += se2; \
  218. sse += sse2; \
  219. if (w > wf * 2) { \
  220. se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  221. src + 32, src_stride, x_offset, y_offset, \
  222. - dst + 32, dst_stride, sec + 32, w, h, &sse2); \
  223. + dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
  224. se += se2; \
  225. sse += sse2; \
  226. se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  227. src + 48, src_stride, x_offset, y_offset, \
  228. - dst + 48, dst_stride, sec + 48, w, h, &sse2); \
  229. + dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
  230. se += se2; \
  231. sse += sse2; \
  232. } \
  233. @@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
  234. int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  235. src, src_stride, x_offset, \
  236. y_offset, dst, dst_stride, \
  237. - sec, w, h, &sse); \
  238. + sec, w, h, &sse, NULL, NULL); \
  239. if (w > wf) { \
  240. uint32_t sse2; \
  241. int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  242. src + 16, src_stride, \
  243. x_offset, y_offset, \
  244. dst + 16, dst_stride, \
  245. - sec + 16, w, h, &sse2); \
  246. + sec + 16, w, h, &sse2, \
  247. + NULL, NULL); \
  248. se += se2; \
  249. sse += sse2; \
  250. if (w > wf * 2) { \
  251. @@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
  252. src + 32, src_stride, \
  253. x_offset, y_offset, \
  254. dst + 32, dst_stride, \
  255. - sec + 32, w, h, &sse2); \
  256. + sec + 32, w, h, &sse2, \
  257. + NULL, NULL); \
  258. se += se2; \
  259. sse += sse2; \
  260. se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  261. src + 48, src_stride, \
  262. x_offset, y_offset, \
  263. dst + 48, dst_stride, \
  264. - sec + 48, w, h, &sse2); \
  265. + sec + 48, w, h, &sse2, \
  266. + NULL, NULL); \
  267. se += se2; \
  268. sse += sse2; \
  269. } \
  270. @@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
  271. int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  272. src + (start_row * src_stride), src_stride, x_offset, \
  273. y_offset, dst + (start_row * dst_stride), dst_stride, \
  274. - sec + (start_row * w), w, height, &sse2); \
  275. + sec + (start_row * w), w, height, &sse2, NULL, NULL); \
  276. se += se2; \
  277. long_sse += sse2; \
  278. if (w > wf) { \
  279. @@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
  280. src + 16 + (start_row * src_stride), src_stride, \
  281. x_offset, y_offset, \
  282. dst + 16 + (start_row * dst_stride), dst_stride, \
  283. - sec + 16 + (start_row * w), w, height, &sse2); \
  284. + sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
  285. se += se2; \
  286. long_sse += sse2; \
  287. if (w > wf * 2) { \
  288. @@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
  289. src + 32 + (start_row * src_stride), src_stride, \
  290. x_offset, y_offset, \
  291. dst + 32 + (start_row * dst_stride), dst_stride, \
  292. - sec + 32 + (start_row * w), w, height, &sse2); \
  293. + sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
  294. se += se2; \
  295. long_sse += sse2; \
  296. se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
  297. src + 48 + (start_row * src_stride), src_stride, \
  298. x_offset, y_offset, \
  299. dst + 48 + (start_row * dst_stride), dst_stride, \
  300. - sec + 48 + (start_row * w), w, height, &sse2); \
  301. + sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
  302. se += se2; \
  303. long_sse += sse2; \
  304. } \
  305. --
  306. 2.7.0