yuv_convert_mmx.cpp 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include <mmintrin.h>
  5. #include "yuv_row.h"
  6. namespace mozilla {
  7. namespace gfx {
  8. // FilterRows combines two rows of the image using linear interpolation.
  9. // MMX version does 8 pixels at a time.
  10. void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
  11. int source_width, int source_y_fraction) {
  12. __m64 zero = _mm_setzero_si64();
  13. __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
  14. __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
  15. const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
  16. const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
  17. __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
  18. __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
  19. do {
  20. __m64 y0 = *y0_ptr64++;
  21. __m64 y1 = *y1_ptr64++;
  22. __m64 y2 = _mm_unpackhi_pi8(y0, zero);
  23. __m64 y3 = _mm_unpackhi_pi8(y1, zero);
  24. y0 = _mm_unpacklo_pi8(y0, zero);
  25. y1 = _mm_unpacklo_pi8(y1, zero);
  26. y0 = _mm_mullo_pi16(y0, y0_fraction);
  27. y1 = _mm_mullo_pi16(y1, y1_fraction);
  28. y2 = _mm_mullo_pi16(y2, y0_fraction);
  29. y3 = _mm_mullo_pi16(y3, y1_fraction);
  30. y0 = _mm_add_pi16(y0, y1);
  31. y2 = _mm_add_pi16(y2, y3);
  32. y0 = _mm_srli_pi16(y0, 8);
  33. y2 = _mm_srli_pi16(y2, 8);
  34. y0 = _mm_packs_pu16(y0, y2);
  35. *dest64++ = y0;
  36. } while (dest64 < end64);
  37. }
  38. } // namespace gfx
  39. } // namespace mozilla