bench-dot.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. // -*- mode: c++; coding: utf-8 -*-
  2. /// @file bench-dot.hh
  3. /// @brief Benchmark for dot with various array types.
  4. // (c) Daniel Llorens - 2011, 2014-2015, 2017
  5. // This library is free software; you can redistribute it and/or modify it under
  6. // the terms of the GNU Lesser General Public License as published by the Free
  7. // Software Foundation; either version 3 of the License, or (at your option) any
  8. // later version.
  9. #include <iostream>
  10. #include <iomanip>
  11. #include <random>
  12. #include "ra/test.hh"
  13. #include "ra/complex.hh"
  14. #include "ra/ra.hh"
  15. #include "ra/bench.hh"
  16. using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder;
  17. using ra::Small, ra::View, ra::Unique, ra::dim_t;
  18. using real = double;
  19. int const N = 200000;
  20. Small<dim_t, 1> S1 { 24*24 };
  21. Small<dim_t, 2> S2 { 24, 24 };
  22. Small<dim_t, 3> S3 { 8, 8, 9 };
  23. struct by_raw
  24. {
  25. constexpr static char const * name = "raw";
  26. template <class A, class B>
  27. real operator()(A const & a, B const & b)
  28. {
  29. constexpr int rank = ra::rank_s<A>();
  30. if constexpr ((std::is_pointer_v<A> && std::is_pointer_v<B>) || 1==rank) {
  31. real y(0.);
  32. for (int j=0; j<S1[0]; ++j) {
  33. y += a[j]*b[j];
  34. }
  35. return y;
  36. } else if constexpr (2==rank) {
  37. real y(0.);
  38. for (int j=0; j<S2[0]; ++j) {
  39. for (int k=0; k<S2[1]; ++k) {
  40. y += a(j, k)*b(j, k);
  41. }
  42. }
  43. return y;
  44. } else if constexpr (3==rank) {
  45. real y(0.);
  46. for (int j=0; j<S3[0]; ++j) {
  47. for (int k=0; k<S3[1]; ++k) {
  48. for (int l=0; l<S3[2]; ++l) {
  49. y += a(j, k, l)*b(j, k, l);
  50. }
  51. }
  52. }
  53. return y;
  54. } else {
  55. abort();
  56. }
  57. }
  58. };
  59. #define BY_PLY(NAME, INSIDE) \
  60. struct NAME { \
  61. constexpr static char const * name = STRINGIZE(NAME); \
  62. template <class A, class B> real operator()(A && a, B && b) \
  63. { \
  64. real y(0.); \
  65. INSIDE; \
  66. return y; \
  67. } \
  68. }; \
  69. #define BY_PLY_TAGGED(PLYER) \
  70. /* plain */ \
  71. BY_PLY(JOIN(by_1l_, PLYER), \
  72. ra::PLYER(ra::map([&y](real const a, real const b) { y += a*b; }, \
  73. a, b))) \
  74. \
  75. /* separate reduction */ \
  76. BY_PLY(JOIN(by_2l_, PLYER), \
  77. ra::PLYER(ra::map([&y](real const a) { y += a; }, \
  78. ra::map([](real const a, real const b) { return a*b; }, \
  79. a, b)))) \
  80. \
  81. /* using trivial rank conjunction */ \
  82. BY_PLY(JOIN(by_1w_, PLYER), \
  83. ra::PLYER(ra::map(ra::wrank<0, 0>([&y](real const a, real const b) { y += a*b; }), \
  84. a, b))) \
  85. \
  86. /* separate reduction: using trivial rank conjunction */ \
  87. BY_PLY(JOIN(by_2w_, PLYER), \
  88. ra::PLYER(ra::map(ra::wrank<0>([&y](real const a) { y += a; }), \
  89. ra::map(ra::wrank<0, 0>([](real const a, real const b) { return a*b; }), \
  90. a, b))));
  91. BY_PLY_TAGGED(ply_ravel);
  92. BY_PLY_TAGGED(plyf);
  93. real a, b, ref, rspec;
  94. int main()
  95. {
  96. std::random_device rand;
  97. a = rand();
  98. b = rand();
  99. ref = a*b*S1[0]*N;
  100. TestRecorder tr;
  101. {
  102. auto bench = [&tr](char const * tag, auto s, auto && ref, int reps, auto && f)
  103. {
  104. rspec = 1e-2;
  105. constexpr int M = ra::size(s);
  106. decltype(s) A(a);
  107. decltype(s) B(b);
  108. real y(0.);
  109. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { y += f(A, B); });
  110. tr.info(std::setw(6), std::fixed, Benchmark::avg(bv)/M/1e-9, " ns [", Benchmark::stddev(bv)/M/1e-9, "] ", tag)
  111. .test_rel_error(a*b*M*reps*3, y, rspec);
  112. };
  113. auto f_small_indexed_1 = [](auto && A, auto && B)
  114. {
  115. real y = 0;
  116. for (int j=0; j!=A.size(); ++j) {
  117. y += A(j)*B(j);
  118. }
  119. return y;
  120. };
  121. auto f_small_indexed_2 = [](auto && A, auto && B)
  122. {
  123. real y = 0;
  124. for (int i=0; i!=A.size(0); ++i) {
  125. for (int j=0; j!=A.size(1); ++j) {
  126. y += A(i, j)*B(i, j);
  127. }
  128. }
  129. return y;
  130. };
  131. auto f_small_indexed_3 = [](auto && A, auto && B)
  132. {
  133. real y = 0;
  134. for (int i=0; i!=A.size(0); ++i) {
  135. for (int j=0; j!=A.size(1); ++j) {
  136. for (int k=0; k!=A.size(2); ++k) {
  137. y += A(i, j, k)*B(i, j, k);
  138. }
  139. }
  140. }
  141. return y;
  142. };
  143. auto f_small_indexed_raw = [](auto && A, auto && B)
  144. {
  145. real * a = A.data();
  146. real * b = B.data();
  147. real y = 0;
  148. for (int j=0; j!=A.size(); ++j) {
  149. y += a[j]*b[j];
  150. }
  151. return y;
  152. };
  153. // optimize() plugs into the definition of operator*, etc.
  154. // for rank>2 or even rank>1 depending on the sizes, the performance of this depends on [ra43].
  155. auto f_small_op = [](auto && A, auto && B)
  156. {
  157. return sum(A*B);
  158. };
  159. #define DEFINE_SMALL_PLY(name, plier) \
  160. auto JOIN(f_small_, plier) = [](auto && A, auto && B) \
  161. { \
  162. real y = 0; \
  163. plier(ra::map([&y](real a, real b) { y += a*b; }, A, B)); \
  164. return y; \
  165. }
  166. DEFINE_SMALL_PLY(ply_ravel, ply_ravel);
  167. DEFINE_SMALL_PLY(plyf, plyf);
  168. DEFINE_SMALL_PLY(ply, ply);
  169. auto bench_all = [&](auto s, auto && f_small_indexed)
  170. {
  171. constexpr int M = ra::size(s);
  172. tr.section("small <", ra::shape(s), ">");
  173. auto extra = [&]() { return int(double(std::rand())*100/RAND_MAX); };
  174. int reps = (1000*1000*100)/M;
  175. bench("indexed", s, ref, reps+extra(), f_small_indexed);
  176. bench("indexed_raw", s, ref, reps+extra(), f_small_indexed_raw);
  177. bench("op", s, ref, reps+extra(), f_small_op);
  178. bench("ply_ravel", s, ref, reps+extra(), f_small_ply_ravel);
  179. bench("plyf", s, ref, reps+extra(), f_small_plyf);
  180. bench("ply", s, ref, reps+extra(), f_small_ply);
  181. };
  182. bench_all(ra::Small<real, 2>(), f_small_indexed_1);
  183. bench_all(ra::Small<real, 3>(), f_small_indexed_1);
  184. bench_all(ra::Small<real, 4>(), f_small_indexed_1);
  185. bench_all(ra::Small<real, 8>(), f_small_indexed_1);
  186. bench_all(ra::Small<real, 2, 2>(), f_small_indexed_2);
  187. bench_all(ra::Small<real, 3, 3>(), f_small_indexed_2);
  188. bench_all(ra::Small<real, 4, 4>(), f_small_indexed_2);
  189. bench_all(ra::Small<real, 3, 3, 3>(), f_small_indexed_3);
  190. }
  191. rspec = 2e-11;
  192. auto bench = [&tr](auto && a, auto && b, auto && ref, real rspec, int reps, auto && f)
  193. {
  194. real x = 0.;
  195. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { x += f(a, b); });
  196. tr.info(std::setw(6), std::fixed, Benchmark::avg(bv)/1e-9, " ns [", Benchmark::stddev(bv)/1e-9, "] ", f.name)
  197. .test_rel_error(ref*3, x, rspec);
  198. };
  199. #define BENCH(f) bench(A, B, ref, rspec, N, f {});
  200. tr.section("std::vector<>");
  201. {
  202. std::vector<real> A(S1[0], a);
  203. std::vector<real> B(S1[0], b);
  204. BENCH(by_raw);
  205. }
  206. tr.section("unchecked pointer");
  207. {
  208. std::unique_ptr<real []> Au { new real[S1[0]] };
  209. std::unique_ptr<real []> Bu { new real[S1[0]] };
  210. auto A = Au.get();
  211. auto B = Bu.get();
  212. std::fill(A, A+S1[0], a);
  213. std::fill(B, B+S1[0], b);
  214. BENCH(by_raw);
  215. }
  216. #define BENCH_ALL \
  217. FOR_EACH(BENCH, by_raw, by_1l_ply_ravel, by_2l_ply_ravel, by_1w_ply_ravel, by_2w_ply_ravel); \
  218. FOR_EACH(BENCH, by_1l_plyf, by_2l_plyf, by_1w_plyf, by_2w_plyf); \
  219. tr.section("ra:: wrapped std::vector<>");
  220. {
  221. auto A = std::vector<real>(S1[0], a);
  222. auto B = std::vector<real>(S1[0], b);
  223. BENCH_ALL;
  224. }
  225. tr.section("raw<1>");
  226. {
  227. ra::Unique<real, 1> A(S1, a);
  228. ra::Unique<real, 1> B(S1, b);
  229. BENCH_ALL;
  230. }
  231. tr.section("raw<2>");
  232. {
  233. ra::Unique<real, 2> A(S2, a);
  234. ra::Unique<real, 2> B(S2, b);
  235. BENCH_ALL;
  236. }
  237. tr.section("raw<3>");
  238. {
  239. ra::Unique<real, 3> A(S3, a);
  240. ra::Unique<real, 3> B(S3, b);
  241. std::vector<real> x(17, ra::ALINF<real>);
  242. BENCH_ALL;
  243. }
  244. return tr.summary();
  245. }