ConvectionKernels_EndpointRefiner.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #pragma once
  2. #ifndef __CVTT_ENDPOINTREFINER_H__
  3. #define __CVTT_ENDPOINTREFINER_H__
  4. #include "ConvectionKernels_ParallelMath.h"
  5. namespace cvtt
  6. {
  7. namespace Internal
  8. {
  9. // Solve for a, b where v = a*t + b
  10. // This allows endpoints to be mapped to where T=0 and T=1
  11. // Least squares from totals:
  12. // a = (tv - t*v/w)/(tt - t*t/w)
  13. // b = (v - a*t)/w
  14. template<int TVectorSize>
  15. class EndpointRefiner
  16. {
  17. public:
  18. typedef ParallelMath::Float MFloat;
  19. typedef ParallelMath::UInt16 MUInt16;
  20. typedef ParallelMath::UInt15 MUInt15;
  21. typedef ParallelMath::AInt16 MAInt16;
  22. typedef ParallelMath::SInt16 MSInt16;
  23. typedef ParallelMath::SInt32 MSInt32;
  24. MFloat m_tv[TVectorSize];
  25. MFloat m_v[TVectorSize];
  26. MFloat m_tt;
  27. MFloat m_t;
  28. MFloat m_w;
  29. int m_wu;
  30. float m_rcpMaxIndex;
  31. float m_channelWeights[TVectorSize];
  32. float m_rcpChannelWeights[TVectorSize];
  33. void Init(int indexRange, const float channelWeights[TVectorSize])
  34. {
  35. for (int ch = 0; ch < TVectorSize; ch++)
  36. {
  37. m_tv[ch] = ParallelMath::MakeFloatZero();
  38. m_v[ch] = ParallelMath::MakeFloatZero();
  39. }
  40. m_tt = ParallelMath::MakeFloatZero();
  41. m_t = ParallelMath::MakeFloatZero();
  42. m_w = ParallelMath::MakeFloatZero();
  43. m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
  44. for (int ch = 0; ch < TVectorSize; ch++)
  45. {
  46. m_channelWeights[ch] = channelWeights[ch];
  47. m_rcpChannelWeights[ch] = 1.0f;
  48. if (m_channelWeights[ch] != 0.0f)
  49. m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
  50. }
  51. m_wu = 0;
  52. }
  53. void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
  54. {
  55. MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
  56. for (int ch = 0; ch < TVectorSize; ch++)
  57. {
  58. MFloat v = pwFloatPixel[ch] * weight;
  59. m_tv[ch] = m_tv[ch] + t * v;
  60. m_v[ch] = m_v[ch] + v;
  61. }
  62. m_tt = m_tt + weight * t * t;
  63. m_t = m_t + weight * t;
  64. m_w = m_w + weight;
  65. }
  66. void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
  67. {
  68. MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
  69. for (int ch = 0; ch < numRealChannels; ch++)
  70. {
  71. MFloat v = pwFloatPixel[ch];
  72. m_tv[ch] = m_tv[ch] + t * v;
  73. m_v[ch] = m_v[ch] + v;
  74. }
  75. m_tt = m_tt + t * t;
  76. m_t = m_t + t;
  77. m_wu++;
  78. }
  79. void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
  80. {
  81. ContributeUnweightedPW(floatPixel, index, TVectorSize);
  82. }
  83. void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
  84. {
  85. // a = (tv - t*v/w)/(tt - t*t/w)
  86. // b = (v - a*t)/w
  87. MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
  88. ParallelMath::MakeSafeDenominator(w);
  89. MFloat wRcp = ParallelMath::Reciprocal(w);
  90. MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
  91. ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
  92. ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
  93. for (int ch = 0; ch < TVectorSize; ch++)
  94. {
  95. /*
  96. if (adenom == 0.0)
  97. p1 = p2 = er.v / er.w;
  98. else
  99. {
  100. float4 a = (er.tv - er.t*er.v / er.w) / adenom;
  101. float4 b = (er.v - a * er.t) / er.w;
  102. p1 = b;
  103. p2 = a + b;
  104. }
  105. */
  106. MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
  107. MFloat b = (m_v[ch] - a * m_t) * wRcp;
  108. MFloat p1 = b;
  109. MFloat p2 = a + b;
  110. ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
  111. ParallelMath::ConditionalSet(p2, adenomZero, p1);
  112. // Unweight
  113. float inverseWeight = m_rcpChannelWeights[ch];
  114. endPoint[0][ch] = p1 * inverseWeight;
  115. endPoint[1][ch] = p2 * inverseWeight;
  116. }
  117. }
  118. void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
  119. {
  120. MFloat floatEndPoint[2][TVectorSize];
  121. GetRefinedEndpoints(floatEndPoint);
  122. for (int epi = 0; epi < 2; epi++)
  123. for (int ch = 0; ch < TVectorSize; ch++)
  124. endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
  125. }
  126. void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
  127. {
  128. GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
  129. }
  130. void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
  131. {
  132. MFloat floatEndPoint[2][TVectorSize];
  133. GetRefinedEndpoints(floatEndPoint);
  134. for (int epi = 0; epi < 2; epi++)
  135. {
  136. for (int ch = 0; ch < TVectorSize; ch++)
  137. {
  138. MFloat f = floatEndPoint[epi][ch];
  139. if (isSigned)
  140. endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
  141. else
  142. endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
  143. }
  144. }
  145. }
  146. };
  147. }
  148. }
  149. #endif