bench-sum-rows.cc 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. // -*- mode: c++; coding: utf-8 -*-
  2. /// @file bench-sum-rows.cc
  3. /// @brief Benchmark various ways to sum rows.
  4. // (c) Daniel Llorens - 2016-2017
  5. // This library is free software; you can redistribute it and/or modify it under
  6. // the terms of the GNU Lesser General Public License as published by the Free
  7. // Software Foundation; either version 3 of the License, or (at your option) any
  8. // later version.
  9. #include <iostream>
  10. #include <iomanip>
  11. #include "ra/ra.hh"
  12. #include "ra/test.hh"
  13. #include "ra/bench.hh"
  14. using std::cout, std::endl, std::flush, ra::TestRecorder;
  15. using real = double;
  16. int main()
  17. {
  18. TestRecorder tr(cout);
  19. cout.precision(4);
  20. auto bench =
  21. [&tr](char const * tag, int m, int n, int reps, auto && f)
  22. {
  23. ra::Big<real, 2> a({m, n}, ra::_0 - ra::_1);
  24. ra::Big<real, 1> ref({n}, 0);
  25. iter<1>(ref) += iter<1>(a)*reps;
  26. ra::Big<real, 1> c({n}, ra::none);
  27. auto bv = Benchmark().repeats(reps).runs(3)
  28. .once_f([&](auto && repeat) { c=0.; repeat([&]() { f(c, a); }); });
  29. tr.info(std::setw(5), std::fixed, Benchmark::avg(bv)/(m*n)/1e-9, " ns [",
  30. Benchmark::stddev(bv)/(m*n)/1e-9 ,"] ", tag).test_eq(ref, c);
  31. };
  32. auto bench_all =
  33. [&](int m, int n, int reps)
  34. {
  35. tr.section(m, " x ", n, " times ", reps);
  36. bench("raw", m, n, reps,
  37. [](auto & c, auto const & a)
  38. {
  39. real * __restrict__ ap = a.data();
  40. real * __restrict__ cp = c.data();
  41. ra::dim_t const m = a.size(0);
  42. ra::dim_t const n = a.size(1);
  43. for (ra::dim_t i=0; i!=m; ++i) {
  44. for (ra::dim_t j=0; j!=n; ++j) {
  45. cp[j] += ap[i*n+j];
  46. }
  47. }
  48. });
  49. bench("sideways", m, n, reps,
  50. [](auto & c, auto const & a)
  51. {
  52. for (int j=0, jend=a.size(1); j<jend; ++j) {
  53. c(j) += sum(a(ra::all, j));
  54. }
  55. });
  56. bench("accumrows", m, n, reps,
  57. [](auto & c, auto const & a)
  58. {
  59. for_each([&c](auto && a) { c += a; }, iter<1>(a));
  60. });
  61. bench("wrank1", m, n, reps,
  62. [](auto & c, auto const & a)
  63. {
  64. for_each(ra::wrank<1, 1>([](auto & c, auto && a) { c += a; }), c, a);
  65. });
  66. bench("wrank2", m, n, reps,
  67. [](auto & c, auto const & a)
  68. {
  69. for_each(ra::wrank<1, 1>(ra::wrank<0, 0>([](auto & c, auto a) { c += a; })), c, a);
  70. });
  71. bench("accumscalar", m, n, reps,
  72. [](auto & c, auto const & a)
  73. {
  74. ra::scalar(c) += iter<1>(a);
  75. });
  76. bench("accumiter", m, n, reps,
  77. [](auto & c, auto const & a)
  78. {
  79. iter<1>(c) += iter<1>(a);
  80. });
  81. bench("frametransp", m, n, reps,
  82. [](auto & c, auto const & a)
  83. {
  84. c += transpose<1, 0>(a);
  85. });
  86. };
  87. bench_all(1, 1000000, 20);
  88. bench_all(10, 100000, 20);
  89. bench_all(100, 10000, 20);
  90. bench_all(1000, 1000, 20);
  91. bench_all(10000, 100, 20);
  92. bench_all(100000, 10, 20);
  93. bench_all(1000000, 1, 20);
  94. bench_all(1, 10000, 2000);
  95. bench_all(10, 1000, 2000);
  96. bench_all(100, 100, 2000);
  97. bench_all(1000, 10, 2000);
  98. bench_all(10000, 1, 2000);
  99. return tr.summary();
  100. }