ConvectionKernels_IndexSelector.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #pragma once
  2. #ifndef __CVTT_INDEXSELECTOR_H__
  3. #define __CVTT_INDEXSELECTOR_H__
  4. #include "ConvectionKernels_ParallelMath.h"
  5. namespace cvtt
  6. {
  7. namespace Internal
  8. {
  9. extern const ParallelMath::UInt16 g_weightReciprocals[17];
  10. template<int TVectorSize>
  11. class IndexSelector
  12. {
  13. public:
  14. typedef ParallelMath::Float MFloat;
  15. typedef ParallelMath::UInt16 MUInt16;
  16. typedef ParallelMath::UInt15 MUInt15;
  17. typedef ParallelMath::SInt16 MSInt16;
  18. typedef ParallelMath::AInt16 MAInt16;
  19. typedef ParallelMath::SInt32 MSInt32;
  20. typedef ParallelMath::UInt31 MUInt31;
  21. template<class TInterpolationEPType, class TColorEPType>
  22. void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
  23. {
  24. // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
  25. // We need to select indexes using the color-space endpoints.
  26. m_isUniform = true;
  27. for (int ch = 1; ch < TVectorSize; ch++)
  28. {
  29. if (channelWeights[ch] != channelWeights[0])
  30. m_isUniform = false;
  31. }
  32. // To work with channel weights, we need something where:
  33. // pxDiff = px - ep[0]
  34. // epDiff = ep[1] - ep[0]
  35. //
  36. // weightedEPDiff = epDiff * channelWeights
  37. // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
  38. // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
  39. // index = normalizedIndex * maxValue
  40. //
  41. // Equivalent to:
  42. // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
  43. // index = dot(axis, pxDiff)
  44. for (int ep = 0; ep < 2; ep++)
  45. for (int ch = 0; ch < TVectorSize; ch++)
  46. m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
  47. m_range = range;
  48. m_maxValue = static_cast<float>(range - 1);
  49. MFloat epDiffWeighted[TVectorSize];
  50. for (int ch = 0; ch < TVectorSize; ch++)
  51. {
  52. m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
  53. MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
  54. epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
  55. }
  56. MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
  57. for (int ch = 1; ch < TVectorSize; ch++)
  58. lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
  59. ParallelMath::MakeSafeDenominator(lenSquared);
  60. MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
  61. for (int ch = 0; ch < TVectorSize; ch++)
  62. m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
  63. }
  64. template<bool TSigned>
  65. void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
  66. {
  67. MAInt16 converted[2][TVectorSize];
  68. for (int epi = 0; epi < 2; epi++)
  69. for (int ch = 0; ch < TVectorSize; ch++)
  70. converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
  71. Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
  72. }
  73. void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
  74. {
  75. MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
  76. for (int ch = 0; ch < numRealChannels; ch++)
  77. {
  78. MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
  79. MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
  80. pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
  81. }
  82. }
  83. void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
  84. {
  85. MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
  86. for (int ch = 0; ch < numRealChannels; ch++)
  87. {
  88. MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
  89. MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
  90. pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
  91. }
  92. }
  93. void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
  94. {
  95. ReconstructLDR_BC7(index, pixel, TVectorSize);
  96. }
  97. void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
  98. {
  99. ReconstructLDRPrecise(index, pixel, TVectorSize);
  100. }
  101. MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
  102. {
  103. MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
  104. for (int ch = 1; ch < TVectorSize; ch++)
  105. dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
  106. return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
  107. }
  108. protected:
  109. MAInt16 m_endPoint[2][TVectorSize];
  110. private:
  111. MFloat m_origin[TVectorSize];
  112. MFloat m_axis[TVectorSize];
  113. int m_range;
  114. float m_maxValue;
  115. bool m_isUniform;
  116. };
  117. }
  118. }
  119. #endif