bench-sum-cols.C 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. // -*- mode: c++; coding: utf-8 -*-
  2. /// @file bench-sum-cols.C
  3. /// @brief Benchmark various ways to sum columns.
  4. // (c) Daniel Llorens - 2016-2017
  5. // This library is free software; you can redistribute it and/or modify it under
  6. // the terms of the GNU Lesser General Public License as published by the Free
  7. // Software Foundation; either version 3 of the License, or (at your option) any
  8. // later version.
  9. #include <iostream>
  10. #include <iomanip>
  11. #include "ra/operators.H"
  12. #include "ra/io.H"
  13. #include "ra/test.H"
  14. #include "ra/bench.H"
  15. using std::cout, std::endl, std::flush;
  16. using real = double;
  17. int main()
  18. {
  19. TestRecorder tr(cout);
  20. cout.precision(4);
  21. auto bench =
  22. [&tr](char const * tag, int m, int n, int reps, auto && f)
  23. {
  24. ra::Big<real, 2> a({m, n}, ra::_0 - ra::_1);
  25. ra::Big<real, 1> ref({m}, 0);
  26. ref += a*reps;
  27. ra::Big<real, 1> c({m}, ra::none);
  28. auto bv = Benchmark().repeats(reps).runs(3)
  29. .once_f([&](auto && repeat) { c = 0.; repeat([&]() { f(c, a); }); });
  30. tr.info(std::setw(5), std::fixed, Benchmark::avg(bv)/(m*n)/1e-9, " ns [",
  31. Benchmark::stddev(bv)/(m*n)/1e-9 ,"] ", tag).test_eq(ref, c);
  32. };
  33. auto bench_all =
  34. [&](int m, int n, int reps)
  35. {
  36. tr.section(m, " x ", n, " times ", reps);
  37. bench("raw", m, n, reps,
  38. [](auto & c, auto const & a)
  39. {
  40. real * __restrict__ ap = a.data();
  41. real * __restrict__ cp = c.data();
  42. ra::dim_t const m = a.size(0);
  43. ra::dim_t const n = a.size(1);
  44. for (ra::dim_t i=0; i!=m; ++i) {
  45. for (ra::dim_t j=0; j!=n; ++j) {
  46. cp[i] += ap[i*n+j];
  47. }
  48. }
  49. });
  50. bench("sideways", m, n, reps,
  51. [](auto & c, auto const & a)
  52. {
  53. for (int j=0, jend=a.size(1); j<jend; ++j) {
  54. c += a(ra::all, j);
  55. }
  56. });
  57. bench("accumcols", m, n, reps,
  58. [](auto & c, auto const & a)
  59. {
  60. for_each([](auto & c, auto && a) { c += sum(a); }, c, iter<1>(a));
  61. });
  62. bench("wrank1", m, n, reps,
  63. [](auto & c, auto const & a)
  64. {
  65. for_each(ra::wrank<0, 0>([](auto & c, auto && a) { c += a; }), c, a);
  66. });
  67. bench("framematch", m, n, reps,
  68. [](auto & c, auto const & a)
  69. {
  70. c += a; // bump c after each row, so it cannot be raveled
  71. });
  72. };
  73. bench_all(1, 1000000, 20);
  74. bench_all(10, 100000, 20);
  75. bench_all(100, 10000, 20);
  76. bench_all(1000, 1000, 20);
  77. bench_all(10000, 100, 20);
  78. bench_all(100000, 10, 20);
  79. bench_all(1000000, 1, 20);
  80. bench_all(1, 10000, 2000);
  81. bench_all(10, 1000, 2000);
  82. bench_all(100, 100, 2000);
  83. bench_all(1000, 10, 2000);
  84. bench_all(10000, 1, 2000);
  85. return tr.summary();
  86. }