bench-reduce-sqrm.cc 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // (c) Daniel Llorens - 2011, 2017
  2. // This library is free software; you can redistribute it and/or modify it under
  3. // the terms of the GNU Lesser General Public License as published by the Free
  4. // Software Foundation; either version 3 of the License, or (at your option) any
  5. // later version.
  6. /// @file bench-reduce-sqrm.hh
  7. /// @brief Benchmark for reduce_sqrm with various array types.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include "ra/big.hh"
  11. #include "ra/small.hh"
  12. #include "ra/operators.hh"
  13. #include "ra/real.hh"
  14. #include "ra/test.hh"
  15. #include "ra/bench.hh"
  16. using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder;
  17. using real = double;
  18. using real4 = ra::Small<real, 4>;
  19. int const N = 500000;
  20. ra::Small<ra::dim_t, 1> S1 { 24*24 };
  21. ra::Small<ra::dim_t, 2> S2 { 24, 24 };
  22. ra::Small<ra::dim_t, 3> S3 { 8, 8, 9 };
  23. TestRecorder tr(std::cout);
  24. real y;
  25. template <class BV>
  26. void report(int size, BV const & bv)
  27. {
  28. tr.info(std::setw(5), std::fixed, Benchmark::avg(bv)/size/1e-9, " ns [", Benchmark::stddev(bv)/size/1e-9 ,"] ", bv.name)
  29. .test_eq(prod(S1)*N*4*4, y);
  30. }
  31. int main()
  32. {
  33. Benchmark bm = Benchmark().runs(3);
  34. report(4,
  35. bm.name("real4 raw").repeats(N*prod(S1)/4)
  36. .run_f([&](auto && repeat)
  37. {
  38. real4 A(7.), B(3.);
  39. y = 0.;
  40. repeat([&]()
  41. {
  42. for (int j=0; j!=4; ++j) {
  43. y += sqrm(A(j)-B(j));
  44. }
  45. });
  46. }));
  47. report(4,
  48. bm.name("real4 expr").repeats(N*prod(S1)/4)
  49. .run_f([&](auto && repeat)
  50. {
  51. real4 A(7.), B(3.);
  52. y = 0.;
  53. repeat([&]()
  54. {
  55. y += reduce_sqrm(A-B);
  56. });
  57. }));
  58. report(prod(S1),
  59. bm.name("C array raw").repeats(N)
  60. .run_f([&](auto && repeat)
  61. {
  62. ra::Unique<real, 1> A(S1, 7.);
  63. ra::Unique<real, 1> B(S1, 3.);
  64. y = 0.;
  65. repeat([&]()
  66. {
  67. real const * a = A.data();
  68. real const * b = B.data();
  69. for (int j=0; j<S1[0]; ++j) {
  70. y += sqrm(a[j]-b[j]);
  71. }
  72. });
  73. }));
  74. // sqrm+reduction in one op.
  75. auto traversal = [&](auto && repeat, auto const & a, auto const & b)
  76. {
  77. y = 0.;
  78. repeat([&]()
  79. {
  80. for_each([&](real const a, real const b) { y += sqrm(a, b); }, a, b);
  81. });
  82. };
  83. // separate reduction: compare abstraction penalty with by_traversal.
  84. auto traversal2 = [&](auto && repeat, auto const & a, auto const & b)
  85. {
  86. y = 0.;
  87. repeat([&]()
  88. {
  89. for_each([&](real const a) { y += a; },
  90. map([](real const a, real const b) { return sqrm(a, b); },
  91. a, b));
  92. });
  93. };
  94. {
  95. ra::Unique<real, 1> A(S1, 7.);
  96. ra::Unique<real, 1> B(S1, 3.);
  97. report(prod(S1), bm.name("ra::Unique<1> ply nested 1").repeats(N).once_f(traversal, A, B));
  98. report(prod(S1), bm.name("ra::Unique<1> ply nested 2").repeats(N).once_f(traversal2, A, B));
  99. report(prod(S1), bm.name("ra::Unique<1> raw").repeats(N)
  100. .once_f([&](auto && repeat)
  101. {
  102. y = 0.;
  103. repeat([&]()
  104. {
  105. for (int j=0; j<S1[0]; ++j) {
  106. y += sqrm(A(j)-B(j));
  107. }
  108. });
  109. }));
  110. }
  111. {
  112. ra::Unique<real, 2> A(S2, 7.);
  113. ra::Unique<real, 2> B(S2, 3.);
  114. report(prod(S2), bm.name("ra::Unique<2> ply nested 1").repeats(N).once_f(traversal, A, B));
  115. report(prod(S2), bm.name("ra::Unique<2> ply nested 2").repeats(N).once_f(traversal2, A, B));
  116. report(prod(S2), bm.name("ra::Unique<2> raw").repeats(N)
  117. .once_f([&](auto && repeat)
  118. {
  119. y = 0.;
  120. repeat([&]()
  121. {
  122. for (int j=0; j<S2[0]; ++j) {
  123. for (int k=0; k<S2[1]; ++k) {
  124. y += sqrm(A(j, k)-B(j, k));
  125. }
  126. }
  127. });
  128. }));
  129. }
  130. {
  131. ra::Unique<real, 3> A(S3, 7.);
  132. ra::Unique<real, 3> B(S3, 3.);
  133. report(prod(S3), bm.name("ra::Unique<3> ply nested 1").repeats(N).once_f(traversal, A, B));
  134. report(prod(S3), bm.name("ra::Unique<3> ply nested 2").repeats(N).once_f(traversal2, A, B));
  135. report(prod(S3), bm.name("ra::Unique<3> raw").repeats(N)
  136. .once_f([&](auto && repeat)
  137. {
  138. y = 0.;
  139. repeat([&]()
  140. {
  141. for (int j=0; j<S3[0]; ++j) {
  142. for (int k=0; k<S3[1]; ++k) {
  143. for (int l=0; l<S3[2]; ++l) {
  144. y += sqrm(A(j, k, l)-B(j, k, l));
  145. }
  146. }
  147. }
  148. });
  149. }));
  150. }
  151. return tr.summary();
  152. }