scale.cc 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673
  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/scale.h"
  11. #include <assert.h>
  12. #include <string.h>
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/planar_functions.h" // For CopyPlane
  15. #include "libyuv/row.h"
  16. #include "libyuv/scale_row.h"
  17. #ifdef __cplusplus
  18. namespace libyuv {
  19. extern "C" {
  20. #endif
  21. static __inline int Abs(int v) {
  22. return v >= 0 ? v : -v;
  23. }
  24. #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
  25. // Scale plane, 1/2
  26. // This is an optimized version for scaling down a plane to 1/2 of
  27. // its original size.
  28. static void ScalePlaneDown2(int src_width, int src_height,
  29. int dst_width, int dst_height,
  30. int src_stride, int dst_stride,
  31. const uint8* src_ptr, uint8* dst_ptr,
  32. enum FilterMode filtering) {
  33. int y;
  34. void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
  35. uint8* dst_ptr, int dst_width) =
  36. filtering == kFilterNone ? ScaleRowDown2_C :
  37. (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
  38. int row_stride = src_stride << 1;
  39. if (!filtering) {
  40. src_ptr += src_stride; // Point to odd rows.
  41. src_stride = 0;
  42. }
  43. #if defined(HAS_SCALEROWDOWN2_NEON)
  44. if (TestCpuFlag(kCpuHasNEON)) {
  45. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
  46. (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
  47. ScaleRowDown2Box_Any_NEON);
  48. if (IS_ALIGNED(dst_width, 16)) {
  49. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
  50. (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
  51. ScaleRowDown2Box_NEON);
  52. }
  53. }
  54. #endif
  55. #if defined(HAS_SCALEROWDOWN2_SSSE3)
  56. if (TestCpuFlag(kCpuHasSSSE3)) {
  57. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 :
  58. (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 :
  59. ScaleRowDown2Box_Any_SSSE3);
  60. if (IS_ALIGNED(dst_width, 16)) {
  61. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 :
  62. (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 :
  63. ScaleRowDown2Box_SSSE3);
  64. }
  65. }
  66. #endif
  67. #if defined(HAS_SCALEROWDOWN2_AVX2)
  68. if (TestCpuFlag(kCpuHasAVX2)) {
  69. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
  70. (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
  71. ScaleRowDown2Box_Any_AVX2);
  72. if (IS_ALIGNED(dst_width, 32)) {
  73. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
  74. (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
  75. ScaleRowDown2Box_AVX2);
  76. }
  77. }
  78. #endif
  79. #if defined(HAS_SCALEROWDOWN2_DSPR2)
  80. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
  81. IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
  82. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  83. ScaleRowDown2 = filtering ?
  84. ScaleRowDown2Box_DSPR2 : ScaleRowDown2_DSPR2;
  85. }
  86. #endif
  87. if (filtering == kFilterLinear) {
  88. src_stride = 0;
  89. }
  90. // TODO(fbarchard): Loop through source height to allow odd height.
  91. for (y = 0; y < dst_height; ++y) {
  92. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  93. src_ptr += row_stride;
  94. dst_ptr += dst_stride;
  95. }
  96. }
  97. static void ScalePlaneDown2_16(int src_width, int src_height,
  98. int dst_width, int dst_height,
  99. int src_stride, int dst_stride,
  100. const uint16* src_ptr, uint16* dst_ptr,
  101. enum FilterMode filtering) {
  102. int y;
  103. void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
  104. uint16* dst_ptr, int dst_width) =
  105. filtering == kFilterNone ? ScaleRowDown2_16_C :
  106. (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
  107. ScaleRowDown2Box_16_C);
  108. int row_stride = src_stride << 1;
  109. if (!filtering) {
  110. src_ptr += src_stride; // Point to odd rows.
  111. src_stride = 0;
  112. }
  113. #if defined(HAS_SCALEROWDOWN2_16_NEON)
  114. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
  115. ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
  116. ScaleRowDown2_16_NEON;
  117. }
  118. #endif
  119. #if defined(HAS_SCALEROWDOWN2_16_SSE2)
  120. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
  121. ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
  122. (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
  123. ScaleRowDown2Box_16_SSE2);
  124. }
  125. #endif
  126. #if defined(HAS_SCALEROWDOWN2_16_DSPR2)
  127. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_ptr, 4) &&
  128. IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
  129. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  130. ScaleRowDown2 = filtering ?
  131. ScaleRowDown2Box_16_DSPR2 : ScaleRowDown2_16_DSPR2;
  132. }
  133. #endif
  134. if (filtering == kFilterLinear) {
  135. src_stride = 0;
  136. }
  137. // TODO(fbarchard): Loop through source height to allow odd height.
  138. for (y = 0; y < dst_height; ++y) {
  139. ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
  140. src_ptr += row_stride;
  141. dst_ptr += dst_stride;
  142. }
  143. }
  144. // Scale plane, 1/4
  145. // This is an optimized version for scaling down a plane to 1/4 of
  146. // its original size.
  147. static void ScalePlaneDown4(int src_width, int src_height,
  148. int dst_width, int dst_height,
  149. int src_stride, int dst_stride,
  150. const uint8* src_ptr, uint8* dst_ptr,
  151. enum FilterMode filtering) {
  152. int y;
  153. void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
  154. uint8* dst_ptr, int dst_width) =
  155. filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
  156. int row_stride = src_stride << 2;
  157. if (!filtering) {
  158. src_ptr += src_stride * 2; // Point to row 2.
  159. src_stride = 0;
  160. }
  161. #if defined(HAS_SCALEROWDOWN4_NEON)
  162. if (TestCpuFlag(kCpuHasNEON)) {
  163. ScaleRowDown4 = filtering ?
  164. ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
  165. if (IS_ALIGNED(dst_width, 8)) {
  166. ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
  167. }
  168. }
  169. #endif
  170. #if defined(HAS_SCALEROWDOWN4_SSSE3)
  171. if (TestCpuFlag(kCpuHasSSSE3)) {
  172. ScaleRowDown4 = filtering ?
  173. ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
  174. if (IS_ALIGNED(dst_width, 8)) {
  175. ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
  176. }
  177. }
  178. #endif
  179. #if defined(HAS_SCALEROWDOWN4_AVX2)
  180. if (TestCpuFlag(kCpuHasAVX2)) {
  181. ScaleRowDown4 = filtering ?
  182. ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
  183. if (IS_ALIGNED(dst_width, 16)) {
  184. ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
  185. }
  186. }
  187. #endif
  188. #if defined(HAS_SCALEROWDOWN4_DSPR2)
  189. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
  190. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  191. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  192. ScaleRowDown4 = filtering ?
  193. ScaleRowDown4Box_DSPR2 : ScaleRowDown4_DSPR2;
  194. }
  195. #endif
  196. if (filtering == kFilterLinear) {
  197. src_stride = 0;
  198. }
  199. for (y = 0; y < dst_height; ++y) {
  200. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  201. src_ptr += row_stride;
  202. dst_ptr += dst_stride;
  203. }
  204. }
  205. static void ScalePlaneDown4_16(int src_width, int src_height,
  206. int dst_width, int dst_height,
  207. int src_stride, int dst_stride,
  208. const uint16* src_ptr, uint16* dst_ptr,
  209. enum FilterMode filtering) {
  210. int y;
  211. void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
  212. uint16* dst_ptr, int dst_width) =
  213. filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
  214. int row_stride = src_stride << 2;
  215. if (!filtering) {
  216. src_ptr += src_stride * 2; // Point to row 2.
  217. src_stride = 0;
  218. }
  219. #if defined(HAS_SCALEROWDOWN4_16_NEON)
  220. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
  221. ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
  222. ScaleRowDown4_16_NEON;
  223. }
  224. #endif
  225. #if defined(HAS_SCALEROWDOWN4_16_SSE2)
  226. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  227. ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
  228. ScaleRowDown4_16_SSE2;
  229. }
  230. #endif
  231. #if defined(HAS_SCALEROWDOWN4_16_DSPR2)
  232. if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(row_stride, 4) &&
  233. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  234. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  235. ScaleRowDown4 = filtering ?
  236. ScaleRowDown4Box_16_DSPR2 : ScaleRowDown4_16_DSPR2;
  237. }
  238. #endif
  239. if (filtering == kFilterLinear) {
  240. src_stride = 0;
  241. }
  242. for (y = 0; y < dst_height; ++y) {
  243. ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
  244. src_ptr += row_stride;
  245. dst_ptr += dst_stride;
  246. }
  247. }
  248. // Scale plane down, 3/4
  249. static void ScalePlaneDown34(int src_width, int src_height,
  250. int dst_width, int dst_height,
  251. int src_stride, int dst_stride,
  252. const uint8* src_ptr, uint8* dst_ptr,
  253. enum FilterMode filtering) {
  254. int y;
  255. void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
  256. uint8* dst_ptr, int dst_width);
  257. void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
  258. uint8* dst_ptr, int dst_width);
  259. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  260. assert(dst_width % 3 == 0);
  261. if (!filtering) {
  262. ScaleRowDown34_0 = ScaleRowDown34_C;
  263. ScaleRowDown34_1 = ScaleRowDown34_C;
  264. } else {
  265. ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
  266. ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
  267. }
  268. #if defined(HAS_SCALEROWDOWN34_NEON)
  269. if (TestCpuFlag(kCpuHasNEON)) {
  270. if (!filtering) {
  271. ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
  272. ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
  273. } else {
  274. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
  275. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
  276. }
  277. if (dst_width % 24 == 0) {
  278. if (!filtering) {
  279. ScaleRowDown34_0 = ScaleRowDown34_NEON;
  280. ScaleRowDown34_1 = ScaleRowDown34_NEON;
  281. } else {
  282. ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
  283. ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
  284. }
  285. }
  286. }
  287. #endif
  288. #if defined(HAS_SCALEROWDOWN34_SSSE3)
  289. if (TestCpuFlag(kCpuHasSSSE3)) {
  290. if (!filtering) {
  291. ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
  292. ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
  293. } else {
  294. ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
  295. ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
  296. }
  297. if (dst_width % 24 == 0) {
  298. if (!filtering) {
  299. ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
  300. ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
  301. } else {
  302. ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
  303. ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
  304. }
  305. }
  306. }
  307. #endif
  308. #if defined(HAS_SCALEROWDOWN34_DSPR2)
  309. if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
  310. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  311. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  312. if (!filtering) {
  313. ScaleRowDown34_0 = ScaleRowDown34_DSPR2;
  314. ScaleRowDown34_1 = ScaleRowDown34_DSPR2;
  315. } else {
  316. ScaleRowDown34_0 = ScaleRowDown34_0_Box_DSPR2;
  317. ScaleRowDown34_1 = ScaleRowDown34_1_Box_DSPR2;
  318. }
  319. }
  320. #endif
  321. for (y = 0; y < dst_height - 2; y += 3) {
  322. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  323. src_ptr += src_stride;
  324. dst_ptr += dst_stride;
  325. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  326. src_ptr += src_stride;
  327. dst_ptr += dst_stride;
  328. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
  329. dst_ptr, dst_width);
  330. src_ptr += src_stride * 2;
  331. dst_ptr += dst_stride;
  332. }
  333. // Remainder 1 or 2 rows with last row vertically unfiltered
  334. if ((dst_height % 3) == 2) {
  335. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  336. src_ptr += src_stride;
  337. dst_ptr += dst_stride;
  338. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  339. } else if ((dst_height % 3) == 1) {
  340. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  341. }
  342. }
  343. static void ScalePlaneDown34_16(int src_width, int src_height,
  344. int dst_width, int dst_height,
  345. int src_stride, int dst_stride,
  346. const uint16* src_ptr, uint16* dst_ptr,
  347. enum FilterMode filtering) {
  348. int y;
  349. void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
  350. uint16* dst_ptr, int dst_width);
  351. void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
  352. uint16* dst_ptr, int dst_width);
  353. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  354. assert(dst_width % 3 == 0);
  355. if (!filtering) {
  356. ScaleRowDown34_0 = ScaleRowDown34_16_C;
  357. ScaleRowDown34_1 = ScaleRowDown34_16_C;
  358. } else {
  359. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
  360. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
  361. }
  362. #if defined(HAS_SCALEROWDOWN34_16_NEON)
  363. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
  364. if (!filtering) {
  365. ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
  366. ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
  367. } else {
  368. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
  369. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
  370. }
  371. }
  372. #endif
  373. #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
  374. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  375. if (!filtering) {
  376. ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
  377. ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
  378. } else {
  379. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
  380. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
  381. }
  382. }
  383. #endif
  384. #if defined(HAS_SCALEROWDOWN34_16_DSPR2)
  385. if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 24 == 0) &&
  386. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  387. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  388. if (!filtering) {
  389. ScaleRowDown34_0 = ScaleRowDown34_16_DSPR2;
  390. ScaleRowDown34_1 = ScaleRowDown34_16_DSPR2;
  391. } else {
  392. ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_DSPR2;
  393. ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_DSPR2;
  394. }
  395. }
  396. #endif
  397. for (y = 0; y < dst_height - 2; y += 3) {
  398. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  399. src_ptr += src_stride;
  400. dst_ptr += dst_stride;
  401. ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
  402. src_ptr += src_stride;
  403. dst_ptr += dst_stride;
  404. ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
  405. dst_ptr, dst_width);
  406. src_ptr += src_stride * 2;
  407. dst_ptr += dst_stride;
  408. }
  409. // Remainder 1 or 2 rows with last row vertically unfiltered
  410. if ((dst_height % 3) == 2) {
  411. ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
  412. src_ptr += src_stride;
  413. dst_ptr += dst_stride;
  414. ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
  415. } else if ((dst_height % 3) == 1) {
  416. ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
  417. }
  418. }
  419. // Scale plane, 3/8
  420. // This is an optimized version for scaling down a plane to 3/8
  421. // of its original size.
  422. //
  423. // Uses box filter arranges like this
  424. // aaabbbcc -> abc
  425. // aaabbbcc def
  426. // aaabbbcc ghi
  427. // dddeeeff
  428. // dddeeeff
  429. // dddeeeff
  430. // ggghhhii
  431. // ggghhhii
  432. // Boxes are 3x3, 2x3, 3x2 and 2x2
  433. static void ScalePlaneDown38(int src_width, int src_height,
  434. int dst_width, int dst_height,
  435. int src_stride, int dst_stride,
  436. const uint8* src_ptr, uint8* dst_ptr,
  437. enum FilterMode filtering) {
  438. int y;
  439. void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
  440. uint8* dst_ptr, int dst_width);
  441. void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
  442. uint8* dst_ptr, int dst_width);
  443. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  444. assert(dst_width % 3 == 0);
  445. if (!filtering) {
  446. ScaleRowDown38_3 = ScaleRowDown38_C;
  447. ScaleRowDown38_2 = ScaleRowDown38_C;
  448. } else {
  449. ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
  450. ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
  451. }
  452. #if defined(HAS_SCALEROWDOWN38_NEON)
  453. if (TestCpuFlag(kCpuHasNEON)) {
  454. if (!filtering) {
  455. ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
  456. ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
  457. } else {
  458. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
  459. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
  460. }
  461. if (dst_width % 12 == 0) {
  462. if (!filtering) {
  463. ScaleRowDown38_3 = ScaleRowDown38_NEON;
  464. ScaleRowDown38_2 = ScaleRowDown38_NEON;
  465. } else {
  466. ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
  467. ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
  468. }
  469. }
  470. }
  471. #endif
  472. #if defined(HAS_SCALEROWDOWN38_SSSE3)
  473. if (TestCpuFlag(kCpuHasSSSE3)) {
  474. if (!filtering) {
  475. ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
  476. ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
  477. } else {
  478. ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
  479. ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
  480. }
  481. if (dst_width % 12 == 0 && !filtering) {
  482. ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
  483. ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
  484. }
  485. if (dst_width % 6 == 0 && filtering) {
  486. ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
  487. ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
  488. }
  489. }
  490. #endif
  491. #if defined(HAS_SCALEROWDOWN38_DSPR2)
  492. if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
  493. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  494. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  495. if (!filtering) {
  496. ScaleRowDown38_3 = ScaleRowDown38_DSPR2;
  497. ScaleRowDown38_2 = ScaleRowDown38_DSPR2;
  498. } else {
  499. ScaleRowDown38_3 = ScaleRowDown38_3_Box_DSPR2;
  500. ScaleRowDown38_2 = ScaleRowDown38_2_Box_DSPR2;
  501. }
  502. }
  503. #endif
  504. for (y = 0; y < dst_height - 2; y += 3) {
  505. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  506. src_ptr += src_stride * 3;
  507. dst_ptr += dst_stride;
  508. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  509. src_ptr += src_stride * 3;
  510. dst_ptr += dst_stride;
  511. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  512. src_ptr += src_stride * 2;
  513. dst_ptr += dst_stride;
  514. }
  515. // Remainder 1 or 2 rows with last row vertically unfiltered
  516. if ((dst_height % 3) == 2) {
  517. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  518. src_ptr += src_stride * 3;
  519. dst_ptr += dst_stride;
  520. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  521. } else if ((dst_height % 3) == 1) {
  522. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  523. }
  524. }
  525. static void ScalePlaneDown38_16(int src_width, int src_height,
  526. int dst_width, int dst_height,
  527. int src_stride, int dst_stride,
  528. const uint16* src_ptr, uint16* dst_ptr,
  529. enum FilterMode filtering) {
  530. int y;
  531. void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
  532. uint16* dst_ptr, int dst_width);
  533. void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
  534. uint16* dst_ptr, int dst_width);
  535. const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
  536. assert(dst_width % 3 == 0);
  537. if (!filtering) {
  538. ScaleRowDown38_3 = ScaleRowDown38_16_C;
  539. ScaleRowDown38_2 = ScaleRowDown38_16_C;
  540. } else {
  541. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
  542. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
  543. }
  544. #if defined(HAS_SCALEROWDOWN38_16_NEON)
  545. if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
  546. if (!filtering) {
  547. ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
  548. ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
  549. } else {
  550. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
  551. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
  552. }
  553. }
  554. #endif
  555. #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
  556. if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
  557. if (!filtering) {
  558. ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
  559. ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
  560. } else {
  561. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
  562. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
  563. }
  564. }
  565. #endif
  566. #if defined(HAS_SCALEROWDOWN38_16_DSPR2)
  567. if (TestCpuFlag(kCpuHasDSPR2) && (dst_width % 12 == 0) &&
  568. IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
  569. IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
  570. if (!filtering) {
  571. ScaleRowDown38_3 = ScaleRowDown38_16_DSPR2;
  572. ScaleRowDown38_2 = ScaleRowDown38_16_DSPR2;
  573. } else {
  574. ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_DSPR2;
  575. ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_DSPR2;
  576. }
  577. }
  578. #endif
  579. for (y = 0; y < dst_height - 2; y += 3) {
  580. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  581. src_ptr += src_stride * 3;
  582. dst_ptr += dst_stride;
  583. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  584. src_ptr += src_stride * 3;
  585. dst_ptr += dst_stride;
  586. ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
  587. src_ptr += src_stride * 2;
  588. dst_ptr += dst_stride;
  589. }
  590. // Remainder 1 or 2 rows with last row vertically unfiltered
  591. if ((dst_height % 3) == 2) {
  592. ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
  593. src_ptr += src_stride * 3;
  594. dst_ptr += dst_stride;
  595. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  596. } else if ((dst_height % 3) == 1) {
  597. ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
  598. }
  599. }
  600. #define MIN1(x) ((x) < 1 ? 1 : (x))
  601. static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
  602. uint32 sum = 0u;
  603. int x;
  604. assert(iboxwidth > 0);
  605. for (x = 0; x < iboxwidth; ++x) {
  606. sum += src_ptr[x];
  607. }
  608. return sum;
  609. }
  610. static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
  611. uint32 sum = 0u;
  612. int x;
  613. assert(iboxwidth > 0);
  614. for (x = 0; x < iboxwidth; ++x) {
  615. sum += src_ptr[x];
  616. }
  617. return sum;
  618. }
  619. static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
  620. const uint16* src_ptr, uint8* dst_ptr) {
  621. int i;
  622. int scaletbl[2];
  623. int minboxwidth = dx >> 16;
  624. int boxwidth;
  625. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  626. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  627. for (i = 0; i < dst_width; ++i) {
  628. int ix = x >> 16;
  629. x += dx;
  630. boxwidth = MIN1((x >> 16) - ix);
  631. *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) *
  632. scaletbl[boxwidth - minboxwidth] >> 16;
  633. }
  634. }
  635. static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
  636. const uint32* src_ptr, uint16* dst_ptr) {
  637. int i;
  638. int scaletbl[2];
  639. int minboxwidth = dx >> 16;
  640. int boxwidth;
  641. scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
  642. scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
  643. for (i = 0; i < dst_width; ++i) {
  644. int ix = x >> 16;
  645. x += dx;
  646. boxwidth = MIN1((x >> 16) - ix);
  647. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
  648. scaletbl[boxwidth - minboxwidth] >> 16;
  649. }
  650. }
  651. static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
  652. const uint16* src_ptr, uint8* dst_ptr) {
  653. int scaleval = 65536 / boxheight;
  654. int i;
  655. src_ptr += (x >> 16);
  656. for (i = 0; i < dst_width; ++i) {
  657. *dst_ptr++ = src_ptr[i] * scaleval >> 16;
  658. }
  659. }
  660. static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
  661. const uint16* src_ptr, uint8* dst_ptr) {
  662. int boxwidth = MIN1(dx >> 16);
  663. int scaleval = 65536 / (boxwidth * boxheight);
  664. int i;
  665. x >>= 16;
  666. for (i = 0; i < dst_width; ++i) {
  667. *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
  668. x += boxwidth;
  669. }
  670. }
  671. static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
  672. const uint32* src_ptr, uint16* dst_ptr) {
  673. int boxwidth = MIN1(dx >> 16);
  674. int scaleval = 65536 / (boxwidth * boxheight);
  675. int i;
  676. for (i = 0; i < dst_width; ++i) {
  677. *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
  678. x += boxwidth;
  679. }
  680. }
  681. // Scale plane down to any dimensions, with interpolation.
  682. // (boxfilter).
  683. //
  684. // Same method as SimpleScale, which is fixed point, outputting
  685. // one pixel of destination using fixed point (16.16) to step
  686. // through source, sampling a box of pixel with simple
  687. // averaging.
  688. static void ScalePlaneBox(int src_width, int src_height,
  689. int dst_width, int dst_height,
  690. int src_stride, int dst_stride,
  691. const uint8* src_ptr, uint8* dst_ptr) {
  692. int j, k;
  693. // Initial source x/y coordinate and step values as 16.16 fixed point.
  694. int x = 0;
  695. int y = 0;
  696. int dx = 0;
  697. int dy = 0;
  698. const int max_y = (src_height << 16);
  699. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
  700. &x, &y, &dx, &dy);
  701. src_width = Abs(src_width);
  702. {
  703. // Allocate a row buffer of uint16.
  704. align_buffer_64(row16, src_width * 2);
  705. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  706. const uint16* src_ptr, uint8* dst_ptr) =
  707. (dx & 0xffff) ? ScaleAddCols2_C:
  708. ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
  709. void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
  710. ScaleAddRow_C;
  711. #if defined(HAS_SCALEADDROW_SSE2)
  712. if (TestCpuFlag(kCpuHasSSE2)) {
  713. ScaleAddRow = ScaleAddRow_Any_SSE2;
  714. if (IS_ALIGNED(src_width, 16)) {
  715. ScaleAddRow = ScaleAddRow_SSE2;
  716. }
  717. }
  718. #endif
  719. #if defined(HAS_SCALEADDROW_AVX2)
  720. if (TestCpuFlag(kCpuHasAVX2)) {
  721. ScaleAddRow = ScaleAddRow_Any_AVX2;
  722. if (IS_ALIGNED(src_width, 32)) {
  723. ScaleAddRow = ScaleAddRow_AVX2;
  724. }
  725. }
  726. #endif
  727. #if defined(HAS_SCALEADDROW_NEON)
  728. if (TestCpuFlag(kCpuHasNEON)) {
  729. ScaleAddRow = ScaleAddRow_Any_NEON;
  730. if (IS_ALIGNED(src_width, 16)) {
  731. ScaleAddRow = ScaleAddRow_NEON;
  732. }
  733. }
  734. #endif
  735. for (j = 0; j < dst_height; ++j) {
  736. int boxheight;
  737. int iy = y >> 16;
  738. const uint8* src = src_ptr + iy * src_stride;
  739. y += dy;
  740. if (y > max_y) {
  741. y = max_y;
  742. }
  743. boxheight = MIN1((y >> 16) - iy);
  744. memset(row16, 0, src_width * 2);
  745. for (k = 0; k < boxheight; ++k) {
  746. ScaleAddRow(src, (uint16 *)(row16), src_width);
  747. src += src_stride;
  748. }
  749. ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
  750. dst_ptr += dst_stride;
  751. }
  752. free_aligned_buffer_64(row16);
  753. }
  754. }
  755. static void ScalePlaneBox_16(int src_width, int src_height,
  756. int dst_width, int dst_height,
  757. int src_stride, int dst_stride,
  758. const uint16* src_ptr, uint16* dst_ptr) {
  759. int j, k;
  760. // Initial source x/y coordinate and step values as 16.16 fixed point.
  761. int x = 0;
  762. int y = 0;
  763. int dx = 0;
  764. int dy = 0;
  765. const int max_y = (src_height << 16);
  766. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
  767. &x, &y, &dx, &dy);
  768. src_width = Abs(src_width);
  769. {
  770. // Allocate a row buffer of uint32.
  771. align_buffer_64(row32, src_width * 4);
  772. void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
  773. const uint32* src_ptr, uint16* dst_ptr) =
  774. (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
  775. void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
  776. ScaleAddRow_16_C;
  777. #if defined(HAS_SCALEADDROW_16_SSE2)
  778. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
  779. ScaleAddRow = ScaleAddRow_16_SSE2;
  780. }
  781. #endif
  782. for (j = 0; j < dst_height; ++j) {
  783. int boxheight;
  784. int iy = y >> 16;
  785. const uint16* src = src_ptr + iy * src_stride;
  786. y += dy;
  787. if (y > max_y) {
  788. y = max_y;
  789. }
  790. boxheight = MIN1((y >> 16) - iy);
  791. memset(row32, 0, src_width * 4);
  792. for (k = 0; k < boxheight; ++k) {
  793. ScaleAddRow(src, (uint32 *)(row32), src_width);
  794. src += src_stride;
  795. }
  796. ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
  797. dst_ptr += dst_stride;
  798. }
  799. free_aligned_buffer_64(row32);
  800. }
  801. }
  802. // Scale plane down with bilinear interpolation.
  803. void ScalePlaneBilinearDown(int src_width, int src_height,
  804. int dst_width, int dst_height,
  805. int src_stride, int dst_stride,
  806. const uint8* src_ptr, uint8* dst_ptr,
  807. enum FilterMode filtering) {
  808. // Initial source x/y coordinate and step values as 16.16 fixed point.
  809. int x = 0;
  810. int y = 0;
  811. int dx = 0;
  812. int dy = 0;
  813. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  814. // Allocate a row buffer.
  815. align_buffer_64(row, src_width);
  816. const int max_y = (src_height - 1) << 16;
  817. int j;
  818. void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
  819. int dst_width, int x, int dx) =
  820. (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
  821. void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
  822. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  823. InterpolateRow_C;
  824. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
  825. &x, &y, &dx, &dy);
  826. src_width = Abs(src_width);
  827. #if defined(HAS_INTERPOLATEROW_SSSE3)
  828. if (TestCpuFlag(kCpuHasSSSE3)) {
  829. InterpolateRow = InterpolateRow_Any_SSSE3;
  830. if (IS_ALIGNED(src_width, 16)) {
  831. InterpolateRow = InterpolateRow_SSSE3;
  832. }
  833. }
  834. #endif
  835. #if defined(HAS_INTERPOLATEROW_AVX2)
  836. if (TestCpuFlag(kCpuHasAVX2)) {
  837. InterpolateRow = InterpolateRow_Any_AVX2;
  838. if (IS_ALIGNED(src_width, 32)) {
  839. InterpolateRow = InterpolateRow_AVX2;
  840. }
  841. }
  842. #endif
  843. #if defined(HAS_INTERPOLATEROW_NEON)
  844. if (TestCpuFlag(kCpuHasNEON)) {
  845. InterpolateRow = InterpolateRow_Any_NEON;
  846. if (IS_ALIGNED(src_width, 16)) {
  847. InterpolateRow = InterpolateRow_NEON;
  848. }
  849. }
  850. #endif
  851. #if defined(HAS_INTERPOLATEROW_DSPR2)
  852. if (TestCpuFlag(kCpuHasDSPR2)) {
  853. InterpolateRow = InterpolateRow_Any_DSPR2;
  854. if (IS_ALIGNED(src_width, 4)) {
  855. InterpolateRow = InterpolateRow_DSPR2;
  856. }
  857. }
  858. #endif
  859. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  860. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  861. ScaleFilterCols = ScaleFilterCols_SSSE3;
  862. }
  863. #endif
  864. #if defined(HAS_SCALEFILTERCOLS_NEON)
  865. if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  866. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  867. if (IS_ALIGNED(dst_width, 8)) {
  868. ScaleFilterCols = ScaleFilterCols_NEON;
  869. }
  870. }
  871. #endif
  872. if (y > max_y) {
  873. y = max_y;
  874. }
  875. for (j = 0; j < dst_height; ++j) {
  876. int yi = y >> 16;
  877. const uint8* src = src_ptr + yi * src_stride;
  878. if (filtering == kFilterLinear) {
  879. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  880. } else {
  881. int yf = (y >> 8) & 255;
  882. InterpolateRow(row, src, src_stride, src_width, yf);
  883. ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
  884. }
  885. dst_ptr += dst_stride;
  886. y += dy;
  887. if (y > max_y) {
  888. y = max_y;
  889. }
  890. }
  891. free_aligned_buffer_64(row);
  892. }
  893. void ScalePlaneBilinearDown_16(int src_width, int src_height,
  894. int dst_width, int dst_height,
  895. int src_stride, int dst_stride,
  896. const uint16* src_ptr, uint16* dst_ptr,
  897. enum FilterMode filtering) {
  898. // Initial source x/y coordinate and step values as 16.16 fixed point.
  899. int x = 0;
  900. int y = 0;
  901. int dx = 0;
  902. int dy = 0;
  903. // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  904. // Allocate a row buffer.
  905. align_buffer_64(row, src_width * 2);
  906. const int max_y = (src_height - 1) << 16;
  907. int j;
  908. void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
  909. int dst_width, int x, int dx) =
  910. (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
  911. void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
  912. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  913. InterpolateRow_16_C;
  914. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
  915. &x, &y, &dx, &dy);
  916. src_width = Abs(src_width);
  917. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  918. if (TestCpuFlag(kCpuHasSSE2)) {
  919. InterpolateRow = InterpolateRow_Any_16_SSE2;
  920. if (IS_ALIGNED(src_width, 16)) {
  921. InterpolateRow = InterpolateRow_16_SSE2;
  922. }
  923. }
  924. #endif
  925. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  926. if (TestCpuFlag(kCpuHasSSSE3)) {
  927. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  928. if (IS_ALIGNED(src_width, 16)) {
  929. InterpolateRow = InterpolateRow_16_SSSE3;
  930. }
  931. }
  932. #endif
  933. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  934. if (TestCpuFlag(kCpuHasAVX2)) {
  935. InterpolateRow = InterpolateRow_Any_16_AVX2;
  936. if (IS_ALIGNED(src_width, 32)) {
  937. InterpolateRow = InterpolateRow_16_AVX2;
  938. }
  939. }
  940. #endif
  941. #if defined(HAS_INTERPOLATEROW_16_NEON)
  942. if (TestCpuFlag(kCpuHasNEON)) {
  943. InterpolateRow = InterpolateRow_Any_16_NEON;
  944. if (IS_ALIGNED(src_width, 16)) {
  945. InterpolateRow = InterpolateRow_16_NEON;
  946. }
  947. }
  948. #endif
  949. #if defined(HAS_INTERPOLATEROW_16_DSPR2)
  950. if (TestCpuFlag(kCpuHasDSPR2)) {
  951. InterpolateRow = InterpolateRow_Any_16_DSPR2;
  952. if (IS_ALIGNED(src_width, 4)) {
  953. InterpolateRow = InterpolateRow_16_DSPR2;
  954. }
  955. }
  956. #endif
  957. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  958. if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  959. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  960. }
  961. #endif
  962. if (y > max_y) {
  963. y = max_y;
  964. }
  965. for (j = 0; j < dst_height; ++j) {
  966. int yi = y >> 16;
  967. const uint16* src = src_ptr + yi * src_stride;
  968. if (filtering == kFilterLinear) {
  969. ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
  970. } else {
  971. int yf = (y >> 8) & 255;
  972. InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
  973. ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
  974. }
  975. dst_ptr += dst_stride;
  976. y += dy;
  977. if (y > max_y) {
  978. y = max_y;
  979. }
  980. }
  981. free_aligned_buffer_64(row);
  982. }
  983. // Scale up down with bilinear interpolation.
  984. void ScalePlaneBilinearUp(int src_width, int src_height,
  985. int dst_width, int dst_height,
  986. int src_stride, int dst_stride,
  987. const uint8* src_ptr, uint8* dst_ptr,
  988. enum FilterMode filtering) {
  989. int j;
  990. // Initial source x/y coordinate and step values as 16.16 fixed point.
  991. int x = 0;
  992. int y = 0;
  993. int dx = 0;
  994. int dy = 0;
  995. const int max_y = (src_height - 1) << 16;
  996. void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
  997. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  998. InterpolateRow_C;
  999. void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
  1000. int dst_width, int x, int dx) =
  1001. filtering ? ScaleFilterCols_C : ScaleCols_C;
  1002. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
  1003. &x, &y, &dx, &dy);
  1004. src_width = Abs(src_width);
  1005. #if defined(HAS_INTERPOLATEROW_SSSE3)
  1006. if (TestCpuFlag(kCpuHasSSSE3)) {
  1007. InterpolateRow = InterpolateRow_Any_SSSE3;
  1008. if (IS_ALIGNED(dst_width, 16)) {
  1009. InterpolateRow = InterpolateRow_SSSE3;
  1010. }
  1011. }
  1012. #endif
  1013. #if defined(HAS_INTERPOLATEROW_AVX2)
  1014. if (TestCpuFlag(kCpuHasAVX2)) {
  1015. InterpolateRow = InterpolateRow_Any_AVX2;
  1016. if (IS_ALIGNED(dst_width, 32)) {
  1017. InterpolateRow = InterpolateRow_AVX2;
  1018. }
  1019. }
  1020. #endif
  1021. #if defined(HAS_INTERPOLATEROW_NEON)
  1022. if (TestCpuFlag(kCpuHasNEON)) {
  1023. InterpolateRow = InterpolateRow_Any_NEON;
  1024. if (IS_ALIGNED(dst_width, 16)) {
  1025. InterpolateRow = InterpolateRow_NEON;
  1026. }
  1027. }
  1028. #endif
  1029. #if defined(HAS_INTERPOLATEROW_DSPR2)
  1030. if (TestCpuFlag(kCpuHasDSPR2)) {
  1031. InterpolateRow = InterpolateRow_Any_DSPR2;
  1032. if (IS_ALIGNED(dst_width, 4)) {
  1033. InterpolateRow = InterpolateRow_DSPR2;
  1034. }
  1035. }
  1036. #endif
  1037. if (filtering && src_width >= 32768) {
  1038. ScaleFilterCols = ScaleFilterCols64_C;
  1039. }
  1040. #if defined(HAS_SCALEFILTERCOLS_SSSE3)
  1041. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1042. ScaleFilterCols = ScaleFilterCols_SSSE3;
  1043. }
  1044. #endif
  1045. #if defined(HAS_SCALEFILTERCOLS_NEON)
  1046. if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
  1047. ScaleFilterCols = ScaleFilterCols_Any_NEON;
  1048. if (IS_ALIGNED(dst_width, 8)) {
  1049. ScaleFilterCols = ScaleFilterCols_NEON;
  1050. }
  1051. }
  1052. #endif
  1053. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1054. ScaleFilterCols = ScaleColsUp2_C;
  1055. #if defined(HAS_SCALECOLS_SSE2)
  1056. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1057. ScaleFilterCols = ScaleColsUp2_SSE2;
  1058. }
  1059. #endif
  1060. }
  1061. if (y > max_y) {
  1062. y = max_y;
  1063. }
  1064. {
  1065. int yi = y >> 16;
  1066. const uint8* src = src_ptr + yi * src_stride;
  1067. // Allocate 2 row buffers.
  1068. const int kRowSize = (dst_width + 31) & ~31;
  1069. align_buffer_64(row, kRowSize * 2);
  1070. uint8* rowptr = row;
  1071. int rowstride = kRowSize;
  1072. int lasty = yi;
  1073. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1074. if (src_height > 1) {
  1075. src += src_stride;
  1076. }
  1077. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1078. src += src_stride;
  1079. for (j = 0; j < dst_height; ++j) {
  1080. yi = y >> 16;
  1081. if (yi != lasty) {
  1082. if (y > max_y) {
  1083. y = max_y;
  1084. yi = y >> 16;
  1085. src = src_ptr + yi * src_stride;
  1086. }
  1087. if (yi != lasty) {
  1088. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1089. rowptr += rowstride;
  1090. rowstride = -rowstride;
  1091. lasty = yi;
  1092. src += src_stride;
  1093. }
  1094. }
  1095. if (filtering == kFilterLinear) {
  1096. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1097. } else {
  1098. int yf = (y >> 8) & 255;
  1099. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1100. }
  1101. dst_ptr += dst_stride;
  1102. y += dy;
  1103. }
  1104. free_aligned_buffer_64(row);
  1105. }
  1106. }
  1107. void ScalePlaneBilinearUp_16(int src_width, int src_height,
  1108. int dst_width, int dst_height,
  1109. int src_stride, int dst_stride,
  1110. const uint16* src_ptr, uint16* dst_ptr,
  1111. enum FilterMode filtering) {
  1112. int j;
  1113. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1114. int x = 0;
  1115. int y = 0;
  1116. int dx = 0;
  1117. int dy = 0;
  1118. const int max_y = (src_height - 1) << 16;
  1119. void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
  1120. ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
  1121. InterpolateRow_16_C;
  1122. void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
  1123. int dst_width, int x, int dx) =
  1124. filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
  1125. ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
  1126. &x, &y, &dx, &dy);
  1127. src_width = Abs(src_width);
  1128. #if defined(HAS_INTERPOLATEROW_16_SSE2)
  1129. if (TestCpuFlag(kCpuHasSSE2)) {
  1130. InterpolateRow = InterpolateRow_Any_16_SSE2;
  1131. if (IS_ALIGNED(dst_width, 16)) {
  1132. InterpolateRow = InterpolateRow_16_SSE2;
  1133. }
  1134. }
  1135. #endif
  1136. #if defined(HAS_INTERPOLATEROW_16_SSSE3)
  1137. if (TestCpuFlag(kCpuHasSSSE3)) {
  1138. InterpolateRow = InterpolateRow_Any_16_SSSE3;
  1139. if (IS_ALIGNED(dst_width, 16)) {
  1140. InterpolateRow = InterpolateRow_16_SSSE3;
  1141. }
  1142. }
  1143. #endif
  1144. #if defined(HAS_INTERPOLATEROW_16_AVX2)
  1145. if (TestCpuFlag(kCpuHasAVX2)) {
  1146. InterpolateRow = InterpolateRow_Any_16_AVX2;
  1147. if (IS_ALIGNED(dst_width, 32)) {
  1148. InterpolateRow = InterpolateRow_16_AVX2;
  1149. }
  1150. }
  1151. #endif
  1152. #if defined(HAS_INTERPOLATEROW_16_NEON)
  1153. if (TestCpuFlag(kCpuHasNEON)) {
  1154. InterpolateRow = InterpolateRow_Any_16_NEON;
  1155. if (IS_ALIGNED(dst_width, 16)) {
  1156. InterpolateRow = InterpolateRow_16_NEON;
  1157. }
  1158. }
  1159. #endif
  1160. #if defined(HAS_INTERPOLATEROW_16_DSPR2)
  1161. if (TestCpuFlag(kCpuHasDSPR2)) {
  1162. InterpolateRow = InterpolateRow_Any_16_DSPR2;
  1163. if (IS_ALIGNED(dst_width, 4)) {
  1164. InterpolateRow = InterpolateRow_16_DSPR2;
  1165. }
  1166. }
  1167. #endif
  1168. if (filtering && src_width >= 32768) {
  1169. ScaleFilterCols = ScaleFilterCols64_16_C;
  1170. }
  1171. #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
  1172. if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
  1173. ScaleFilterCols = ScaleFilterCols_16_SSSE3;
  1174. }
  1175. #endif
  1176. if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
  1177. ScaleFilterCols = ScaleColsUp2_16_C;
  1178. #if defined(HAS_SCALECOLS_16_SSE2)
  1179. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1180. ScaleFilterCols = ScaleColsUp2_16_SSE2;
  1181. }
  1182. #endif
  1183. }
  1184. if (y > max_y) {
  1185. y = max_y;
  1186. }
  1187. {
  1188. int yi = y >> 16;
  1189. const uint16* src = src_ptr + yi * src_stride;
  1190. // Allocate 2 row buffers.
  1191. const int kRowSize = (dst_width + 31) & ~31;
  1192. align_buffer_64(row, kRowSize * 4);
  1193. uint16* rowptr = (uint16*)row;
  1194. int rowstride = kRowSize;
  1195. int lasty = yi;
  1196. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1197. if (src_height > 1) {
  1198. src += src_stride;
  1199. }
  1200. ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  1201. src += src_stride;
  1202. for (j = 0; j < dst_height; ++j) {
  1203. yi = y >> 16;
  1204. if (yi != lasty) {
  1205. if (y > max_y) {
  1206. y = max_y;
  1207. yi = y >> 16;
  1208. src = src_ptr + yi * src_stride;
  1209. }
  1210. if (yi != lasty) {
  1211. ScaleFilterCols(rowptr, src, dst_width, x, dx);
  1212. rowptr += rowstride;
  1213. rowstride = -rowstride;
  1214. lasty = yi;
  1215. src += src_stride;
  1216. }
  1217. }
  1218. if (filtering == kFilterLinear) {
  1219. InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
  1220. } else {
  1221. int yf = (y >> 8) & 255;
  1222. InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
  1223. }
  1224. dst_ptr += dst_stride;
  1225. y += dy;
  1226. }
  1227. free_aligned_buffer_64(row);
  1228. }
  1229. }
  1230. // Scale Plane to/from any dimensions, without interpolation.
  1231. // Fixed point math is used for performance: The upper 16 bits
  1232. // of x and dx is the integer part of the source position and
  1233. // the lower 16 bits are the fixed decimal part.
  1234. static void ScalePlaneSimple(int src_width, int src_height,
  1235. int dst_width, int dst_height,
  1236. int src_stride, int dst_stride,
  1237. const uint8* src_ptr, uint8* dst_ptr) {
  1238. int i;
  1239. void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
  1240. int dst_width, int x, int dx) = ScaleCols_C;
  1241. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1242. int x = 0;
  1243. int y = 0;
  1244. int dx = 0;
  1245. int dy = 0;
  1246. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
  1247. &x, &y, &dx, &dy);
  1248. src_width = Abs(src_width);
  1249. if (src_width * 2 == dst_width && x < 0x8000) {
  1250. ScaleCols = ScaleColsUp2_C;
  1251. #if defined(HAS_SCALECOLS_SSE2)
  1252. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1253. ScaleCols = ScaleColsUp2_SSE2;
  1254. }
  1255. #endif
  1256. }
  1257. for (i = 0; i < dst_height; ++i) {
  1258. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
  1259. dst_ptr += dst_stride;
  1260. y += dy;
  1261. }
  1262. }
  1263. static void ScalePlaneSimple_16(int src_width, int src_height,
  1264. int dst_width, int dst_height,
  1265. int src_stride, int dst_stride,
  1266. const uint16* src_ptr, uint16* dst_ptr) {
  1267. int i;
  1268. void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
  1269. int dst_width, int x, int dx) = ScaleCols_16_C;
  1270. // Initial source x/y coordinate and step values as 16.16 fixed point.
  1271. int x = 0;
  1272. int y = 0;
  1273. int dx = 0;
  1274. int dy = 0;
  1275. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
  1276. &x, &y, &dx, &dy);
  1277. src_width = Abs(src_width);
  1278. if (src_width * 2 == dst_width && x < 0x8000) {
  1279. ScaleCols = ScaleColsUp2_16_C;
  1280. #if defined(HAS_SCALECOLS_16_SSE2)
  1281. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
  1282. ScaleCols = ScaleColsUp2_16_SSE2;
  1283. }
  1284. #endif
  1285. }
  1286. for (i = 0; i < dst_height; ++i) {
  1287. ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
  1288. dst_width, x, dx);
  1289. dst_ptr += dst_stride;
  1290. y += dy;
  1291. }
  1292. }
  1293. // Scale a plane.
  1294. // This function dispatches to a specialized scaler based on scale factor.
  1295. LIBYUV_API
  1296. void ScalePlane(const uint8* src, int src_stride,
  1297. int src_width, int src_height,
  1298. uint8* dst, int dst_stride,
  1299. int dst_width, int dst_height,
  1300. enum FilterMode filtering) {
  1301. // Simplify filtering when possible.
  1302. filtering = ScaleFilterReduce(src_width, src_height,
  1303. dst_width, dst_height, filtering);
  1304. // Negative height means invert the image.
  1305. if (src_height < 0) {
  1306. src_height = -src_height;
  1307. src = src + (src_height - 1) * src_stride;
  1308. src_stride = -src_stride;
  1309. }
  1310. // Use specialized scales to improve performance for common resolutions.
  1311. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1312. if (dst_width == src_width && dst_height == src_height) {
  1313. // Straight copy.
  1314. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1315. return;
  1316. }
  1317. if (dst_width == src_width && filtering != kFilterBox) {
  1318. int dy = FixedDiv(src_height, dst_height);
  1319. // Arbitrary scale vertically, but unscaled horizontally.
  1320. ScalePlaneVertical(src_height,
  1321. dst_width, dst_height,
  1322. src_stride, dst_stride, src, dst,
  1323. 0, 0, dy, 1, filtering);
  1324. return;
  1325. }
  1326. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1327. // Scale down.
  1328. if (4 * dst_width == 3 * src_width &&
  1329. 4 * dst_height == 3 * src_height) {
  1330. // optimized, 3/4
  1331. ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
  1332. src_stride, dst_stride, src, dst, filtering);
  1333. return;
  1334. }
  1335. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1336. // optimized, 1/2
  1337. ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
  1338. src_stride, dst_stride, src, dst, filtering);
  1339. return;
  1340. }
  1341. // 3/8 rounded up for odd sized chroma height.
  1342. if (8 * dst_width == 3 * src_width &&
  1343. dst_height == ((src_height * 3 + 7) / 8)) {
  1344. // optimized, 3/8
  1345. ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
  1346. src_stride, dst_stride, src, dst, filtering);
  1347. return;
  1348. }
  1349. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1350. (filtering == kFilterBox || filtering == kFilterNone)) {
  1351. // optimized, 1/4
  1352. ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
  1353. src_stride, dst_stride, src, dst, filtering);
  1354. return;
  1355. }
  1356. }
  1357. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1358. ScalePlaneBox(src_width, src_height, dst_width, dst_height,
  1359. src_stride, dst_stride, src, dst);
  1360. return;
  1361. }
  1362. if (filtering && dst_height > src_height) {
  1363. ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
  1364. src_stride, dst_stride, src, dst, filtering);
  1365. return;
  1366. }
  1367. if (filtering) {
  1368. ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
  1369. src_stride, dst_stride, src, dst, filtering);
  1370. return;
  1371. }
  1372. ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
  1373. src_stride, dst_stride, src, dst);
  1374. }
  1375. LIBYUV_API
  1376. void ScalePlane_16(const uint16* src, int src_stride,
  1377. int src_width, int src_height,
  1378. uint16* dst, int dst_stride,
  1379. int dst_width, int dst_height,
  1380. enum FilterMode filtering) {
  1381. // Simplify filtering when possible.
  1382. filtering = ScaleFilterReduce(src_width, src_height,
  1383. dst_width, dst_height, filtering);
  1384. // Negative height means invert the image.
  1385. if (src_height < 0) {
  1386. src_height = -src_height;
  1387. src = src + (src_height - 1) * src_stride;
  1388. src_stride = -src_stride;
  1389. }
  1390. // Use specialized scales to improve performance for common resolutions.
  1391. // For example, all the 1/2 scalings will use ScalePlaneDown2()
  1392. if (dst_width == src_width && dst_height == src_height) {
  1393. // Straight copy.
  1394. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
  1395. return;
  1396. }
  1397. if (dst_width == src_width) {
  1398. int dy = FixedDiv(src_height, dst_height);
  1399. // Arbitrary scale vertically, but unscaled vertically.
  1400. ScalePlaneVertical_16(src_height,
  1401. dst_width, dst_height,
  1402. src_stride, dst_stride, src, dst,
  1403. 0, 0, dy, 1, filtering);
  1404. return;
  1405. }
  1406. if (dst_width <= Abs(src_width) && dst_height <= src_height) {
  1407. // Scale down.
  1408. if (4 * dst_width == 3 * src_width &&
  1409. 4 * dst_height == 3 * src_height) {
  1410. // optimized, 3/4
  1411. ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
  1412. src_stride, dst_stride, src, dst, filtering);
  1413. return;
  1414. }
  1415. if (2 * dst_width == src_width && 2 * dst_height == src_height) {
  1416. // optimized, 1/2
  1417. ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
  1418. src_stride, dst_stride, src, dst, filtering);
  1419. return;
  1420. }
  1421. // 3/8 rounded up for odd sized chroma height.
  1422. if (8 * dst_width == 3 * src_width &&
  1423. dst_height == ((src_height * 3 + 7) / 8)) {
  1424. // optimized, 3/8
  1425. ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
  1426. src_stride, dst_stride, src, dst, filtering);
  1427. return;
  1428. }
  1429. if (4 * dst_width == src_width && 4 * dst_height == src_height &&
  1430. filtering != kFilterBilinear) {
  1431. // optimized, 1/4
  1432. ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
  1433. src_stride, dst_stride, src, dst, filtering);
  1434. return;
  1435. }
  1436. }
  1437. if (filtering == kFilterBox && dst_height * 2 < src_height) {
  1438. ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
  1439. src_stride, dst_stride, src, dst);
  1440. return;
  1441. }
  1442. if (filtering && dst_height > src_height) {
  1443. ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
  1444. src_stride, dst_stride, src, dst, filtering);
  1445. return;
  1446. }
  1447. if (filtering) {
  1448. ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
  1449. src_stride, dst_stride, src, dst, filtering);
  1450. return;
  1451. }
  1452. ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
  1453. src_stride, dst_stride, src, dst);
  1454. }
  1455. // Scale an I420 image.
  1456. // This function in turn calls a scaling function for each plane.
  1457. LIBYUV_API
  1458. int I420Scale(const uint8* src_y, int src_stride_y,
  1459. const uint8* src_u, int src_stride_u,
  1460. const uint8* src_v, int src_stride_v,
  1461. int src_width, int src_height,
  1462. uint8* dst_y, int dst_stride_y,
  1463. uint8* dst_u, int dst_stride_u,
  1464. uint8* dst_v, int dst_stride_v,
  1465. int dst_width, int dst_height,
  1466. enum FilterMode filtering) {
  1467. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1468. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1469. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1470. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1471. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1472. src_width > 32768 || src_height > 32768 ||
  1473. !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
  1474. return -1;
  1475. }
  1476. ScalePlane(src_y, src_stride_y, src_width, src_height,
  1477. dst_y, dst_stride_y, dst_width, dst_height,
  1478. filtering);
  1479. ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
  1480. dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
  1481. filtering);
  1482. ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
  1483. dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
  1484. filtering);
  1485. return 0;
  1486. }
  1487. LIBYUV_API
  1488. int I420Scale_16(const uint16* src_y, int src_stride_y,
  1489. const uint16* src_u, int src_stride_u,
  1490. const uint16* src_v, int src_stride_v,
  1491. int src_width, int src_height,
  1492. uint16* dst_y, int dst_stride_y,
  1493. uint16* dst_u, int dst_stride_u,
  1494. uint16* dst_v, int dst_stride_v,
  1495. int dst_width, int dst_height,
  1496. enum FilterMode filtering) {
  1497. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1498. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1499. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1500. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1501. if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
  1502. src_width > 32768 || src_height > 32768 ||
  1503. !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
  1504. return -1;
  1505. }
  1506. ScalePlane_16(src_y, src_stride_y, src_width, src_height,
  1507. dst_y, dst_stride_y, dst_width, dst_height,
  1508. filtering);
  1509. ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
  1510. dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
  1511. filtering);
  1512. ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
  1513. dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
  1514. filtering);
  1515. return 0;
  1516. }
  1517. // Deprecated api
  1518. LIBYUV_API
  1519. int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
  1520. int src_stride_y, int src_stride_u, int src_stride_v,
  1521. int src_width, int src_height,
  1522. uint8* dst_y, uint8* dst_u, uint8* dst_v,
  1523. int dst_stride_y, int dst_stride_u, int dst_stride_v,
  1524. int dst_width, int dst_height,
  1525. LIBYUV_BOOL interpolate) {
  1526. return I420Scale(src_y, src_stride_y,
  1527. src_u, src_stride_u,
  1528. src_v, src_stride_v,
  1529. src_width, src_height,
  1530. dst_y, dst_stride_y,
  1531. dst_u, dst_stride_u,
  1532. dst_v, dst_stride_v,
  1533. dst_width, dst_height,
  1534. interpolate ? kFilterBox : kFilterNone);
  1535. }
  1536. // Deprecated api
  1537. LIBYUV_API
  1538. int ScaleOffset(const uint8* src, int src_width, int src_height,
  1539. uint8* dst, int dst_width, int dst_height, int dst_yoffset,
  1540. LIBYUV_BOOL interpolate) {
  1541. // Chroma requires offset to multiple of 2.
  1542. int dst_yoffset_even = dst_yoffset & ~1;
  1543. int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
  1544. int src_halfheight = SUBSAMPLE(src_height, 1, 1);
  1545. int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
  1546. int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
  1547. int aheight = dst_height - dst_yoffset_even * 2; // actual output height
  1548. const uint8* src_y = src;
  1549. const uint8* src_u = src + src_width * src_height;
  1550. const uint8* src_v = src + src_width * src_height +
  1551. src_halfwidth * src_halfheight;
  1552. uint8* dst_y = dst + dst_yoffset_even * dst_width;
  1553. uint8* dst_u = dst + dst_width * dst_height +
  1554. (dst_yoffset_even >> 1) * dst_halfwidth;
  1555. uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
  1556. (dst_yoffset_even >> 1) * dst_halfwidth;
  1557. if (!src || src_width <= 0 || src_height <= 0 ||
  1558. !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
  1559. dst_yoffset_even >= dst_height) {
  1560. return -1;
  1561. }
  1562. return I420Scale(src_y, src_width,
  1563. src_u, src_halfwidth,
  1564. src_v, src_halfwidth,
  1565. src_width, src_height,
  1566. dst_y, dst_width,
  1567. dst_u, dst_halfwidth,
  1568. dst_v, dst_halfwidth,
  1569. dst_width, aheight,
  1570. interpolate ? kFilterBox : kFilterNone);
  1571. }
  1572. #ifdef __cplusplus
  1573. } // extern "C"
  1574. } // namespace libyuv
  1575. #endif