scale_common.cc 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160
  1. /*
  2. * Copyright 2013 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyARGB
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. // CPU agnostic row functions
  25. void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
  26. uint8* dst, int dst_width) {
  27. int x;
  28. for (x = 0; x < dst_width - 1; x += 2) {
  29. dst[0] = src_ptr[1];
  30. dst[1] = src_ptr[3];
  31. dst += 2;
  32. src_ptr += 4;
  33. }
  34. if (dst_width & 1) {
  35. dst[0] = src_ptr[1];
  36. }
  37. }
  38. void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  39. uint16* dst, int dst_width) {
  40. int x;
  41. for (x = 0; x < dst_width - 1; x += 2) {
  42. dst[0] = src_ptr[1];
  43. dst[1] = src_ptr[3];
  44. dst += 2;
  45. src_ptr += 4;
  46. }
  47. if (dst_width & 1) {
  48. dst[0] = src_ptr[1];
  49. }
  50. }
  51. void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
  52. uint8* dst, int dst_width) {
  53. const uint8* s = src_ptr;
  54. int x;
  55. for (x = 0; x < dst_width - 1; x += 2) {
  56. dst[0] = (s[0] + s[1] + 1) >> 1;
  57. dst[1] = (s[2] + s[3] + 1) >> 1;
  58. dst += 2;
  59. s += 4;
  60. }
  61. if (dst_width & 1) {
  62. dst[0] = (s[0] + s[1] + 1) >> 1;
  63. }
  64. }
  65. void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  66. uint16* dst, int dst_width) {
  67. const uint16* s = src_ptr;
  68. int x;
  69. for (x = 0; x < dst_width - 1; x += 2) {
  70. dst[0] = (s[0] + s[1] + 1) >> 1;
  71. dst[1] = (s[2] + s[3] + 1) >> 1;
  72. dst += 2;
  73. s += 4;
  74. }
  75. if (dst_width & 1) {
  76. dst[0] = (s[0] + s[1] + 1) >> 1;
  77. }
  78. }
  79. void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  80. uint8* dst, int dst_width) {
  81. const uint8* s = src_ptr;
  82. const uint8* t = src_ptr + src_stride;
  83. int x;
  84. for (x = 0; x < dst_width - 1; x += 2) {
  85. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  86. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  87. dst += 2;
  88. s += 4;
  89. t += 4;
  90. }
  91. if (dst_width & 1) {
  92. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  93. }
  94. }
  95. void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
  96. uint8* dst, int dst_width) {
  97. const uint8* s = src_ptr;
  98. const uint8* t = src_ptr + src_stride;
  99. int x;
  100. dst_width -= 1;
  101. for (x = 0; x < dst_width - 1; x += 2) {
  102. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  103. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  104. dst += 2;
  105. s += 4;
  106. t += 4;
  107. }
  108. if (dst_width & 1) {
  109. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  110. dst += 1;
  111. s += 2;
  112. t += 2;
  113. }
  114. dst[0] = (s[0] + t[0] + 1) >> 1;
  115. }
  116. void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  117. uint16* dst, int dst_width) {
  118. const uint16* s = src_ptr;
  119. const uint16* t = src_ptr + src_stride;
  120. int x;
  121. for (x = 0; x < dst_width - 1; x += 2) {
  122. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  123. dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
  124. dst += 2;
  125. s += 4;
  126. t += 4;
  127. }
  128. if (dst_width & 1) {
  129. dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
  130. }
  131. }
  132. void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
  133. uint8* dst, int dst_width) {
  134. int x;
  135. for (x = 0; x < dst_width - 1; x += 2) {
  136. dst[0] = src_ptr[2];
  137. dst[1] = src_ptr[6];
  138. dst += 2;
  139. src_ptr += 8;
  140. }
  141. if (dst_width & 1) {
  142. dst[0] = src_ptr[2];
  143. }
  144. }
  145. void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  146. uint16* dst, int dst_width) {
  147. int x;
  148. for (x = 0; x < dst_width - 1; x += 2) {
  149. dst[0] = src_ptr[2];
  150. dst[1] = src_ptr[6];
  151. dst += 2;
  152. src_ptr += 8;
  153. }
  154. if (dst_width & 1) {
  155. dst[0] = src_ptr[2];
  156. }
  157. }
  158. void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  159. uint8* dst, int dst_width) {
  160. intptr_t stride = src_stride;
  161. int x;
  162. for (x = 0; x < dst_width - 1; x += 2) {
  163. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  164. src_ptr[stride + 0] + src_ptr[stride + 1] +
  165. src_ptr[stride + 2] + src_ptr[stride + 3] +
  166. src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
  167. src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
  168. src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
  169. src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
  170. 8) >> 4;
  171. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  172. src_ptr[stride + 4] + src_ptr[stride + 5] +
  173. src_ptr[stride + 6] + src_ptr[stride + 7] +
  174. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
  175. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
  176. src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
  177. src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
  178. 8) >> 4;
  179. dst += 2;
  180. src_ptr += 8;
  181. }
  182. if (dst_width & 1) {
  183. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  184. src_ptr[stride + 0] + src_ptr[stride + 1] +
  185. src_ptr[stride + 2] + src_ptr[stride + 3] +
  186. src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
  187. src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
  188. src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
  189. src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
  190. 8) >> 4;
  191. }
  192. }
  193. void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  194. uint16* dst, int dst_width) {
  195. intptr_t stride = src_stride;
  196. int x;
  197. for (x = 0; x < dst_width - 1; x += 2) {
  198. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  199. src_ptr[stride + 0] + src_ptr[stride + 1] +
  200. src_ptr[stride + 2] + src_ptr[stride + 3] +
  201. src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
  202. src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
  203. src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
  204. src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
  205. 8) >> 4;
  206. dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
  207. src_ptr[stride + 4] + src_ptr[stride + 5] +
  208. src_ptr[stride + 6] + src_ptr[stride + 7] +
  209. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
  210. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
  211. src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
  212. src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
  213. 8) >> 4;
  214. dst += 2;
  215. src_ptr += 8;
  216. }
  217. if (dst_width & 1) {
  218. dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
  219. src_ptr[stride + 0] + src_ptr[stride + 1] +
  220. src_ptr[stride + 2] + src_ptr[stride + 3] +
  221. src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
  222. src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
  223. src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
  224. src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
  225. 8) >> 4;
  226. }
  227. }
  228. void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
  229. uint8* dst, int dst_width) {
  230. int x;
  231. assert((dst_width % 3 == 0) && (dst_width > 0));
  232. for (x = 0; x < dst_width; x += 3) {
  233. dst[0] = src_ptr[0];
  234. dst[1] = src_ptr[1];
  235. dst[2] = src_ptr[3];
  236. dst += 3;
  237. src_ptr += 4;
  238. }
  239. }
  240. void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  241. uint16* dst, int dst_width) {
  242. int x;
  243. assert((dst_width % 3 == 0) && (dst_width > 0));
  244. for (x = 0; x < dst_width; x += 3) {
  245. dst[0] = src_ptr[0];
  246. dst[1] = src_ptr[1];
  247. dst[2] = src_ptr[3];
  248. dst += 3;
  249. src_ptr += 4;
  250. }
  251. }
  252. // Filter rows 0 and 1 together, 3 : 1
  253. void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  254. uint8* d, int dst_width) {
  255. const uint8* s = src_ptr;
  256. const uint8* t = src_ptr + src_stride;
  257. int x;
  258. assert((dst_width % 3 == 0) && (dst_width > 0));
  259. for (x = 0; x < dst_width; x += 3) {
  260. uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  261. uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  262. uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  263. uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  264. uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  265. uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  266. d[0] = (a0 * 3 + b0 + 2) >> 2;
  267. d[1] = (a1 * 3 + b1 + 2) >> 2;
  268. d[2] = (a2 * 3 + b2 + 2) >> 2;
  269. d += 3;
  270. s += 4;
  271. t += 4;
  272. }
  273. }
  274. void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  275. uint16* d, int dst_width) {
  276. const uint16* s = src_ptr;
  277. const uint16* t = src_ptr + src_stride;
  278. int x;
  279. assert((dst_width % 3 == 0) && (dst_width > 0));
  280. for (x = 0; x < dst_width; x += 3) {
  281. uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  282. uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  283. uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  284. uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  285. uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  286. uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  287. d[0] = (a0 * 3 + b0 + 2) >> 2;
  288. d[1] = (a1 * 3 + b1 + 2) >> 2;
  289. d[2] = (a2 * 3 + b2 + 2) >> 2;
  290. d += 3;
  291. s += 4;
  292. t += 4;
  293. }
  294. }
  295. // Filter rows 1 and 2 together, 1 : 1
  296. void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  297. uint8* d, int dst_width) {
  298. const uint8* s = src_ptr;
  299. const uint8* t = src_ptr + src_stride;
  300. int x;
  301. assert((dst_width % 3 == 0) && (dst_width > 0));
  302. for (x = 0; x < dst_width; x += 3) {
  303. uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  304. uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  305. uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  306. uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  307. uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  308. uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  309. d[0] = (a0 + b0 + 1) >> 1;
  310. d[1] = (a1 + b1 + 1) >> 1;
  311. d[2] = (a2 + b2 + 1) >> 1;
  312. d += 3;
  313. s += 4;
  314. t += 4;
  315. }
  316. }
  317. void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  318. uint16* d, int dst_width) {
  319. const uint16* s = src_ptr;
  320. const uint16* t = src_ptr + src_stride;
  321. int x;
  322. assert((dst_width % 3 == 0) && (dst_width > 0));
  323. for (x = 0; x < dst_width; x += 3) {
  324. uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
  325. uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
  326. uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
  327. uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
  328. uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
  329. uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
  330. d[0] = (a0 + b0 + 1) >> 1;
  331. d[1] = (a1 + b1 + 1) >> 1;
  332. d[2] = (a2 + b2 + 1) >> 1;
  333. d += 3;
  334. s += 4;
  335. t += 4;
  336. }
  337. }
  338. // Scales a single row of pixels using point sampling.
  339. void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
  340. int dst_width, int x, int dx) {
  341. int j;
  342. for (j = 0; j < dst_width - 1; j += 2) {
  343. dst_ptr[0] = src_ptr[x >> 16];
  344. x += dx;
  345. dst_ptr[1] = src_ptr[x >> 16];
  346. x += dx;
  347. dst_ptr += 2;
  348. }
  349. if (dst_width & 1) {
  350. dst_ptr[0] = src_ptr[x >> 16];
  351. }
  352. }
  353. void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
  354. int dst_width, int x, int dx) {
  355. int j;
  356. for (j = 0; j < dst_width - 1; j += 2) {
  357. dst_ptr[0] = src_ptr[x >> 16];
  358. x += dx;
  359. dst_ptr[1] = src_ptr[x >> 16];
  360. x += dx;
  361. dst_ptr += 2;
  362. }
  363. if (dst_width & 1) {
  364. dst_ptr[0] = src_ptr[x >> 16];
  365. }
  366. }
  367. // Scales a single row of pixels up by 2x using point sampling.
  368. void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
  369. int dst_width, int x, int dx) {
  370. int j;
  371. for (j = 0; j < dst_width - 1; j += 2) {
  372. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  373. src_ptr += 1;
  374. dst_ptr += 2;
  375. }
  376. if (dst_width & 1) {
  377. dst_ptr[0] = src_ptr[0];
  378. }
  379. }
  380. void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
  381. int dst_width, int x, int dx) {
  382. int j;
  383. for (j = 0; j < dst_width - 1; j += 2) {
  384. dst_ptr[1] = dst_ptr[0] = src_ptr[0];
  385. src_ptr += 1;
  386. dst_ptr += 2;
  387. }
  388. if (dst_width & 1) {
  389. dst_ptr[0] = src_ptr[0];
  390. }
  391. }
  392. // (1-f)a + fb can be replaced with a + f(b-a)
  393. #if defined(__arm__) || defined(__aarch64__)
  394. #define BLENDER(a, b, f) (uint8)((int)(a) + \
  395. ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  396. #else
  397. // inteluses 7 bit math with rounding.
  398. #define BLENDER(a, b, f) (uint8)((int)(a) + \
  399. (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
  400. #endif
  401. void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
  402. int dst_width, int x, int dx) {
  403. int j;
  404. for (j = 0; j < dst_width - 1; j += 2) {
  405. int xi = x >> 16;
  406. int a = src_ptr[xi];
  407. int b = src_ptr[xi + 1];
  408. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  409. x += dx;
  410. xi = x >> 16;
  411. a = src_ptr[xi];
  412. b = src_ptr[xi + 1];
  413. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  414. x += dx;
  415. dst_ptr += 2;
  416. }
  417. if (dst_width & 1) {
  418. int xi = x >> 16;
  419. int a = src_ptr[xi];
  420. int b = src_ptr[xi + 1];
  421. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  422. }
  423. }
  424. void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
  425. int dst_width, int x32, int dx) {
  426. int64 x = (int64)(x32);
  427. int j;
  428. for (j = 0; j < dst_width - 1; j += 2) {
  429. int64 xi = x >> 16;
  430. int a = src_ptr[xi];
  431. int b = src_ptr[xi + 1];
  432. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  433. x += dx;
  434. xi = x >> 16;
  435. a = src_ptr[xi];
  436. b = src_ptr[xi + 1];
  437. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  438. x += dx;
  439. dst_ptr += 2;
  440. }
  441. if (dst_width & 1) {
  442. int64 xi = x >> 16;
  443. int a = src_ptr[xi];
  444. int b = src_ptr[xi + 1];
  445. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  446. }
  447. }
  448. #undef BLENDER
  449. // Same as 8 bit arm blender but return is cast to uint16
  450. #define BLENDER(a, b, f) (uint16)((int)(a) + \
  451. ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
  452. void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
  453. int dst_width, int x, int dx) {
  454. int j;
  455. for (j = 0; j < dst_width - 1; j += 2) {
  456. int xi = x >> 16;
  457. int a = src_ptr[xi];
  458. int b = src_ptr[xi + 1];
  459. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  460. x += dx;
  461. xi = x >> 16;
  462. a = src_ptr[xi];
  463. b = src_ptr[xi + 1];
  464. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  465. x += dx;
  466. dst_ptr += 2;
  467. }
  468. if (dst_width & 1) {
  469. int xi = x >> 16;
  470. int a = src_ptr[xi];
  471. int b = src_ptr[xi + 1];
  472. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  473. }
  474. }
  475. void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
  476. int dst_width, int x32, int dx) {
  477. int64 x = (int64)(x32);
  478. int j;
  479. for (j = 0; j < dst_width - 1; j += 2) {
  480. int64 xi = x >> 16;
  481. int a = src_ptr[xi];
  482. int b = src_ptr[xi + 1];
  483. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  484. x += dx;
  485. xi = x >> 16;
  486. a = src_ptr[xi];
  487. b = src_ptr[xi + 1];
  488. dst_ptr[1] = BLENDER(a, b, x & 0xffff);
  489. x += dx;
  490. dst_ptr += 2;
  491. }
  492. if (dst_width & 1) {
  493. int64 xi = x >> 16;
  494. int a = src_ptr[xi];
  495. int b = src_ptr[xi + 1];
  496. dst_ptr[0] = BLENDER(a, b, x & 0xffff);
  497. }
  498. }
  499. #undef BLENDER
  500. void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
  501. uint8* dst, int dst_width) {
  502. int x;
  503. assert(dst_width % 3 == 0);
  504. for (x = 0; x < dst_width; x += 3) {
  505. dst[0] = src_ptr[0];
  506. dst[1] = src_ptr[3];
  507. dst[2] = src_ptr[6];
  508. dst += 3;
  509. src_ptr += 8;
  510. }
  511. }
  512. void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  513. uint16* dst, int dst_width) {
  514. int x;
  515. assert(dst_width % 3 == 0);
  516. for (x = 0; x < dst_width; x += 3) {
  517. dst[0] = src_ptr[0];
  518. dst[1] = src_ptr[3];
  519. dst[2] = src_ptr[6];
  520. dst += 3;
  521. src_ptr += 8;
  522. }
  523. }
  524. // 8x3 -> 3x1
  525. void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
  526. ptrdiff_t src_stride,
  527. uint8* dst_ptr, int dst_width) {
  528. intptr_t stride = src_stride;
  529. int i;
  530. assert((dst_width % 3 == 0) && (dst_width > 0));
  531. for (i = 0; i < dst_width; i += 3) {
  532. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
  533. src_ptr[stride + 0] + src_ptr[stride + 1] +
  534. src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  535. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  536. (65536 / 9) >> 16;
  537. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
  538. src_ptr[stride + 3] + src_ptr[stride + 4] +
  539. src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  540. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  541. (65536 / 9) >> 16;
  542. dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
  543. src_ptr[stride + 6] + src_ptr[stride + 7] +
  544. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  545. (65536 / 6) >> 16;
  546. src_ptr += 8;
  547. dst_ptr += 3;
  548. }
  549. }
  550. void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
  551. ptrdiff_t src_stride,
  552. uint16* dst_ptr, int dst_width) {
  553. intptr_t stride = src_stride;
  554. int i;
  555. assert((dst_width % 3 == 0) && (dst_width > 0));
  556. for (i = 0; i < dst_width; i += 3) {
  557. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
  558. src_ptr[stride + 0] + src_ptr[stride + 1] +
  559. src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
  560. src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
  561. (65536 / 9) >> 16;
  562. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
  563. src_ptr[stride + 3] + src_ptr[stride + 4] +
  564. src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
  565. src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
  566. (65536 / 9) >> 16;
  567. dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
  568. src_ptr[stride + 6] + src_ptr[stride + 7] +
  569. src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
  570. (65536 / 6) >> 16;
  571. src_ptr += 8;
  572. dst_ptr += 3;
  573. }
  574. }
  575. // 8x2 -> 3x1
  576. void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
  577. uint8* dst_ptr, int dst_width) {
  578. intptr_t stride = src_stride;
  579. int i;
  580. assert((dst_width % 3 == 0) && (dst_width > 0));
  581. for (i = 0; i < dst_width; i += 3) {
  582. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
  583. src_ptr[stride + 0] + src_ptr[stride + 1] +
  584. src_ptr[stride + 2]) * (65536 / 6) >> 16;
  585. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
  586. src_ptr[stride + 3] + src_ptr[stride + 4] +
  587. src_ptr[stride + 5]) * (65536 / 6) >> 16;
  588. dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
  589. src_ptr[stride + 6] + src_ptr[stride + 7]) *
  590. (65536 / 4) >> 16;
  591. src_ptr += 8;
  592. dst_ptr += 3;
  593. }
  594. }
  595. void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
  596. uint16* dst_ptr, int dst_width) {
  597. intptr_t stride = src_stride;
  598. int i;
  599. assert((dst_width % 3 == 0) && (dst_width > 0));
  600. for (i = 0; i < dst_width; i += 3) {
  601. dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
  602. src_ptr[stride + 0] + src_ptr[stride + 1] +
  603. src_ptr[stride + 2]) * (65536 / 6) >> 16;
  604. dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
  605. src_ptr[stride + 3] + src_ptr[stride + 4] +
  606. src_ptr[stride + 5]) * (65536 / 6) >> 16;
  607. dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
  608. src_ptr[stride + 6] + src_ptr[stride + 7]) *
  609. (65536 / 4) >> 16;
  610. src_ptr += 8;
  611. dst_ptr += 3;
  612. }
  613. }
  614. void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
  615. int x;
  616. assert(src_width > 0);
  617. for (x = 0; x < src_width - 1; x += 2) {
  618. dst_ptr[0] += src_ptr[0];
  619. dst_ptr[1] += src_ptr[1];
  620. src_ptr += 2;
  621. dst_ptr += 2;
  622. }
  623. if (src_width & 1) {
  624. dst_ptr[0] += src_ptr[0];
  625. }
  626. }
  627. void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
  628. int x;
  629. assert(src_width > 0);
  630. for (x = 0; x < src_width - 1; x += 2) {
  631. dst_ptr[0] += src_ptr[0];
  632. dst_ptr[1] += src_ptr[1];
  633. src_ptr += 2;
  634. dst_ptr += 2;
  635. }
  636. if (src_width & 1) {
  637. dst_ptr[0] += src_ptr[0];
  638. }
  639. }
  640. void ScaleARGBRowDown2_C(const uint8* src_argb,
  641. ptrdiff_t src_stride,
  642. uint8* dst_argb, int dst_width) {
  643. const uint32* src = (const uint32*)(src_argb);
  644. uint32* dst = (uint32*)(dst_argb);
  645. int x;
  646. for (x = 0; x < dst_width - 1; x += 2) {
  647. dst[0] = src[1];
  648. dst[1] = src[3];
  649. src += 4;
  650. dst += 2;
  651. }
  652. if (dst_width & 1) {
  653. dst[0] = src[1];
  654. }
  655. }
  656. void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
  657. ptrdiff_t src_stride,
  658. uint8* dst_argb, int dst_width) {
  659. int x;
  660. for (x = 0; x < dst_width; ++x) {
  661. dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
  662. dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
  663. dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
  664. dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
  665. src_argb += 8;
  666. dst_argb += 4;
  667. }
  668. }
  669. void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
  670. uint8* dst_argb, int dst_width) {
  671. int x;
  672. for (x = 0; x < dst_width; ++x) {
  673. dst_argb[0] = (src_argb[0] + src_argb[4] +
  674. src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
  675. dst_argb[1] = (src_argb[1] + src_argb[5] +
  676. src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
  677. dst_argb[2] = (src_argb[2] + src_argb[6] +
  678. src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
  679. dst_argb[3] = (src_argb[3] + src_argb[7] +
  680. src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
  681. src_argb += 8;
  682. dst_argb += 4;
  683. }
  684. }
  685. void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
  686. int src_stepx,
  687. uint8* dst_argb, int dst_width) {
  688. const uint32* src = (const uint32*)(src_argb);
  689. uint32* dst = (uint32*)(dst_argb);
  690. int x;
  691. for (x = 0; x < dst_width - 1; x += 2) {
  692. dst[0] = src[0];
  693. dst[1] = src[src_stepx];
  694. src += src_stepx * 2;
  695. dst += 2;
  696. }
  697. if (dst_width & 1) {
  698. dst[0] = src[0];
  699. }
  700. }
  701. void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
  702. ptrdiff_t src_stride,
  703. int src_stepx,
  704. uint8* dst_argb, int dst_width) {
  705. int x;
  706. for (x = 0; x < dst_width; ++x) {
  707. dst_argb[0] = (src_argb[0] + src_argb[4] +
  708. src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
  709. dst_argb[1] = (src_argb[1] + src_argb[5] +
  710. src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
  711. dst_argb[2] = (src_argb[2] + src_argb[6] +
  712. src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
  713. dst_argb[3] = (src_argb[3] + src_argb[7] +
  714. src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
  715. src_argb += src_stepx * 4;
  716. dst_argb += 4;
  717. }
  718. }
  719. // Scales a single row of pixels using point sampling.
  720. void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
  721. int dst_width, int x, int dx) {
  722. const uint32* src = (const uint32*)(src_argb);
  723. uint32* dst = (uint32*)(dst_argb);
  724. int j;
  725. for (j = 0; j < dst_width - 1; j += 2) {
  726. dst[0] = src[x >> 16];
  727. x += dx;
  728. dst[1] = src[x >> 16];
  729. x += dx;
  730. dst += 2;
  731. }
  732. if (dst_width & 1) {
  733. dst[0] = src[x >> 16];
  734. }
  735. }
  736. void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
  737. int dst_width, int x32, int dx) {
  738. int64 x = (int64)(x32);
  739. const uint32* src = (const uint32*)(src_argb);
  740. uint32* dst = (uint32*)(dst_argb);
  741. int j;
  742. for (j = 0; j < dst_width - 1; j += 2) {
  743. dst[0] = src[x >> 16];
  744. x += dx;
  745. dst[1] = src[x >> 16];
  746. x += dx;
  747. dst += 2;
  748. }
  749. if (dst_width & 1) {
  750. dst[0] = src[x >> 16];
  751. }
  752. }
  753. // Scales a single row of pixels up by 2x using point sampling.
  754. void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
  755. int dst_width, int x, int dx) {
  756. const uint32* src = (const uint32*)(src_argb);
  757. uint32* dst = (uint32*)(dst_argb);
  758. int j;
  759. for (j = 0; j < dst_width - 1; j += 2) {
  760. dst[1] = dst[0] = src[0];
  761. src += 1;
  762. dst += 2;
  763. }
  764. if (dst_width & 1) {
  765. dst[0] = src[0];
  766. }
  767. }
  768. // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
  769. // Mimics SSSE3 blender
  770. #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
  771. #define BLENDERC(a, b, f, s) (uint32)( \
  772. BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
  773. #define BLENDER(a, b, f) \
  774. BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
  775. BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
  776. void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
  777. int dst_width, int x, int dx) {
  778. const uint32* src = (const uint32*)(src_argb);
  779. uint32* dst = (uint32*)(dst_argb);
  780. int j;
  781. for (j = 0; j < dst_width - 1; j += 2) {
  782. int xi = x >> 16;
  783. int xf = (x >> 9) & 0x7f;
  784. uint32 a = src[xi];
  785. uint32 b = src[xi + 1];
  786. dst[0] = BLENDER(a, b, xf);
  787. x += dx;
  788. xi = x >> 16;
  789. xf = (x >> 9) & 0x7f;
  790. a = src[xi];
  791. b = src[xi + 1];
  792. dst[1] = BLENDER(a, b, xf);
  793. x += dx;
  794. dst += 2;
  795. }
  796. if (dst_width & 1) {
  797. int xi = x >> 16;
  798. int xf = (x >> 9) & 0x7f;
  799. uint32 a = src[xi];
  800. uint32 b = src[xi + 1];
  801. dst[0] = BLENDER(a, b, xf);
  802. }
  803. }
  804. void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
  805. int dst_width, int x32, int dx) {
  806. int64 x = (int64)(x32);
  807. const uint32* src = (const uint32*)(src_argb);
  808. uint32* dst = (uint32*)(dst_argb);
  809. int j;
  810. for (j = 0; j < dst_width - 1; j += 2) {
  811. int64 xi = x >> 16;
  812. int xf = (x >> 9) & 0x7f;
  813. uint32 a = src[xi];
  814. uint32 b = src[xi + 1];
  815. dst[0] = BLENDER(a, b, xf);
  816. x += dx;
  817. xi = x >> 16;
  818. xf = (x >> 9) & 0x7f;
  819. a = src[xi];
  820. b = src[xi + 1];
  821. dst[1] = BLENDER(a, b, xf);
  822. x += dx;
  823. dst += 2;
  824. }
  825. if (dst_width & 1) {
  826. int64 xi = x >> 16;
  827. int xf = (x >> 9) & 0x7f;
  828. uint32 a = src[xi];
  829. uint32 b = src[xi + 1];
  830. dst[0] = BLENDER(a, b, xf);
  831. }
  832. }
  833. #undef BLENDER1
  834. #undef BLENDERC
  835. #undef BLENDER
  836. // Scale plane vertically with bilinear interpolation.
  837. void ScalePlaneVertical(int src_height,
  838. int dst_width, int dst_height,
  839. int src_stride, int dst_stride,
  840. const uint8* src_argb, uint8* dst_argb,
  841. int x, int y, int dy,
  842. int bpp, enum FilterMode filtering) {
  843. // TODO(fbarchard): Allow higher bpp.
  844. int dst_width_bytes = dst_width * bpp;
  845. void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
  846. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  847. InterpolateRow_C;
  848. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  849. int j;
  850. assert(bpp >= 1 && bpp <= 4);
  851. assert(src_height != 0);
  852. assert(dst_width > 0);
  853. assert(dst_height > 0);
  854. src_argb += (x >> 16) * bpp;
  855. #if defined(HAS_INTERPOLATEROW_SSSE3)
  856. if (TestCpuFlag(kCpuHasSSSE3)) {
  857. InterpolateRow = InterpolateRow_Any_SSSE3;
  858. if (IS_ALIGNED(dst_width_bytes, 16)) {
  859. InterpolateRow = InterpolateRow_SSSE3;
  860. }
  861. }
  862. #endif
  863. #if defined(HAS_INTERPOLATEROW_AVX2)
  864. if (TestCpuFlag(kCpuHasAVX2)) {
  865. InterpolateRow = InterpolateRow_Any_AVX2;
  866. if (IS_ALIGNED(dst_width_bytes, 32)) {
  867. InterpolateRow = InterpolateRow_AVX2;
  868. }
  869. }
  870. #endif
  871. #if defined(HAS_INTERPOLATEROW_NEON)
  872. if (TestCpuFlag(kCpuHasNEON)) {
  873. InterpolateRow = InterpolateRow_Any_NEON;
  874. if (IS_ALIGNED(dst_width_bytes, 16)) {
  875. InterpolateRow = InterpolateRow_NEON;
  876. }
  877. }
  878. #endif
  879. #if defined(HAS_INTERPOLATEROW_DSPR2)
  880. if (TestCpuFlag(kCpuHasDSPR2) &&
  881. IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
  882. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
  883. InterpolateRow = InterpolateRow_Any_DSPR2;
  884. if (IS_ALIGNED(dst_width_bytes, 4)) {
  885. InterpolateRow = InterpolateRow_DSPR2;
  886. }
  887. }
  888. #endif
  889. for (j = 0; j < dst_height; ++j) {
  890. int yi;
  891. int yf;
  892. if (y > max_y) {
  893. y = max_y;
  894. }
  895. yi = y >> 16;
  896. yf = filtering ? ((y >> 8) & 255) : 0;
  897. InterpolateRow(dst_argb, src_argb + yi * src_stride,
  898. src_stride, dst_width_bytes, yf);
  899. dst_argb += dst_stride;
  900. y += dy;
  901. }
  902. }
  903. void ScalePlaneVertical_16(int src_height,
  904. int dst_width, int dst_height,
  905. int src_stride, int dst_stride,
  906. const uint16* src_argb, uint16* dst_argb,
  907. int x, int y, int dy,
  908. int wpp, enum FilterMode filtering) {
  909. // TODO(fbarchard): Allow higher wpp.
  910. int dst_width_words = dst_width * wpp;
  911. void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
  912. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  913. InterpolateRow_16_C;
  914. const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
  915. int j;
  916. assert(wpp >= 1 && wpp <= 2);
  917. assert(src_height != 0);
  918. assert(dst_width > 0);
  919. assert(dst_height > 0);
  920. src_argb += (x >> 16) * wpp;
  921. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  922. if (TestCpuFlag(kCpuHasSSE2)) {
  923. InterpolateRow = InterpolateRow_Any_16_SSE2;
  924. if (IS_ALIGNED(dst_width_bytes, 16)) {
  925. InterpolateRow = InterpolateRow_16_SSE2;
  926. }
  927. }
  928. #endif
  929. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  930. if (TestCpuFlag(kCpuHasSSSE3)) {
  931. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  932. if (IS_ALIGNED(dst_width_bytes, 16)) {
  933. InterpolateRow = InterpolateRow_16_SSSE3;
  934. }
  935. }
  936. #endif
  937. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  938. if (TestCpuFlag(kCpuHasAVX2)) {
  939. InterpolateRow = InterpolateRow_Any_16_AVX2;
  940. if (IS_ALIGNED(dst_width_bytes, 32)) {
  941. InterpolateRow = InterpolateRow_16_AVX2;
  942. }
  943. }
  944. #endif
  945. #if defined(HAS_INTERPOLATEROW_16_NEON)
  946. if (TestCpuFlag(kCpuHasNEON)) {
  947. InterpolateRow = InterpolateRow_Any_16_NEON;
  948. if (IS_ALIGNED(dst_width_bytes, 16)) {
  949. InterpolateRow = InterpolateRow_16_NEON;
  950. }
  951. }
  952. #endif
  953. #if defined(HAS_INTERPOLATEROW_16_DSPR2)
  954. if (TestCpuFlag(kCpuHasDSPR2) &&
  955. IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
  956. IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
  957. InterpolateRow = InterpolateRow_Any_16_DSPR2;
  958. if (IS_ALIGNED(dst_width_bytes, 4)) {
  959. InterpolateRow = InterpolateRow_16_DSPR2;
  960. }
  961. }
  962. #endif
  963. for (j = 0; j < dst_height; ++j) {
  964. int yi;
  965. int yf;
  966. if (y > max_y) {
  967. y = max_y;
  968. }
  969. yi = y >> 16;
  970. yf = filtering ? ((y >> 8) & 255) : 0;
  971. InterpolateRow(dst_argb, src_argb + yi * src_stride,
  972. src_stride, dst_width_words, yf);
  973. dst_argb += dst_stride;
  974. y += dy;
  975. }
  976. }
  977. // Simplify the filtering based on scale factors.
  978. enum FilterMode ScaleFilterReduce(int src_width, int src_height,
  979. int dst_width, int dst_height,
  980. enum FilterMode filtering) {
  981. if (src_width < 0) {
  982. src_width = -src_width;
  983. }
  984. if (src_height < 0) {
  985. src_height = -src_height;
  986. }
  987. if (filtering == kFilterBox) {
  988. // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
  989. if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
  990. filtering = kFilterBilinear;
  991. }
  992. }
  993. if (filtering == kFilterBilinear) {
  994. if (src_height == 1) {
  995. filtering = kFilterLinear;
  996. }
  997. // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
  998. if (dst_height == src_height || dst_height * 3 == src_height) {
  999. filtering = kFilterLinear;
  1000. }
  1001. // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
  1002. // avoid reading 2 pixels horizontally that causes memory exception.
  1003. if (src_width == 1) {
  1004. filtering = kFilterNone;
  1005. }
  1006. }
  1007. if (filtering == kFilterLinear) {
  1008. if (src_width == 1) {
  1009. filtering = kFilterNone;
  1010. }
  1011. // TODO(fbarchard): Detect any odd scale factor and reduce to None.
  1012. if (dst_width == src_width || dst_width * 3 == src_width) {
  1013. filtering = kFilterNone;
  1014. }
  1015. }
  1016. return filtering;
  1017. }
  1018. // Divide num by div and return as 16.16 fixed point result.
  1019. int FixedDiv_C(int num, int div) {
  1020. return (int)(((int64)(num) << 16) / div);
  1021. }
  1022. // Divide num by div and return as 16.16 fixed point result.
  1023. int FixedDiv1_C(int num, int div) {
  1024. return (int)((((int64)(num) << 16) - 0x00010001) /
  1025. (div - 1));
  1026. }
  1027. #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
  1028. // Compute slope values for stepping.
  1029. void ScaleSlope(int src_width, int src_height,
  1030. int dst_width, int dst_height,
  1031. enum FilterMode filtering,
  1032. int* x, int* y, int* dx, int* dy) {
  1033. assert(x != NULL);
  1034. assert(y != NULL);
  1035. assert(dx != NULL);
  1036. assert(dy != NULL);
  1037. assert(src_width != 0);
  1038. assert(src_height != 0);
  1039. assert(dst_width > 0);
  1040. assert(dst_height > 0);
  1041. // Check for 1 pixel and avoid FixedDiv overflow.
  1042. if (dst_width == 1 && src_width >= 32768) {
  1043. dst_width = src_width;
  1044. }
  1045. if (dst_height == 1 && src_height >= 32768) {
  1046. dst_height = src_height;
  1047. }
  1048. if (filtering == kFilterBox) {
  1049. // Scale step for point sampling duplicates all pixels equally.
  1050. *dx = FixedDiv(Abs(src_width), dst_width);
  1051. *dy = FixedDiv(src_height, dst_height);
  1052. *x = 0;
  1053. *y = 0;
  1054. } else if (filtering == kFilterBilinear) {
  1055. // Scale step for bilinear sampling renders last pixel once for upsample.
  1056. if (dst_width <= Abs(src_width)) {
  1057. *dx = FixedDiv(Abs(src_width), dst_width);
  1058. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1059. } else if (dst_width > 1) {
  1060. *dx = FixedDiv1(Abs(src_width), dst_width);
  1061. *x = 0;
  1062. }
  1063. if (dst_height <= src_height) {
  1064. *dy = FixedDiv(src_height, dst_height);
  1065. *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
  1066. } else if (dst_height > 1) {
  1067. *dy = FixedDiv1(src_height, dst_height);
  1068. *y = 0;
  1069. }
  1070. } else if (filtering == kFilterLinear) {
  1071. // Scale step for bilinear sampling renders last pixel once for upsample.
  1072. if (dst_width <= Abs(src_width)) {
  1073. *dx = FixedDiv(Abs(src_width), dst_width);
  1074. *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
  1075. } else if (dst_width > 1) {
  1076. *dx = FixedDiv1(Abs(src_width), dst_width);
  1077. *x = 0;
  1078. }
  1079. *dy = FixedDiv(src_height, dst_height);
  1080. *y = *dy >> 1;
  1081. } else {
  1082. // Scale step for point sampling duplicates all pixels equally.
  1083. *dx = FixedDiv(Abs(src_width), dst_width);
  1084. *dy = FixedDiv(src_height, dst_height);
  1085. *x = CENTERSTART(*dx, 0);
  1086. *y = CENTERSTART(*dy, 0);
  1087. }
  1088. // Negative src_width means horizontally mirror.
  1089. if (src_width < 0) {
  1090. *x += (dst_width - 1) * *dx;
  1091. *dx = -*dx;
  1092. // src_width = -src_width; // Caller must do this.
  1093. }
  1094. }
  1095. #undef CENTERSTART
  1096. #ifdef __cplusplus
  1097. } // extern "C"
  1098. } // namespace libyuv
  1099. #endif