optimize.hh 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra - Naive optimization pass over expression templates.
  3. // (c) Daniel Llorens - 2015-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #pragma once
  9. #include "small.hh"
  10. namespace ra {
  11. template <class E> constexpr decltype(auto) optimize(E && e) { return RA_FWD(e); }
  12. // FIXME only reduces iota exprs as operated on in ra.hh (operators), not a tree like WithLen does.
  13. #if RA_DO_OPT_IOTA==1
  14. // TODO maybe don't opt iota(int)*real -> iota(real) since a+a+... != n*a
  15. template <class X> concept iota_op = ra::is_zero_or_scalar<X> && std::is_arithmetic_v<value_t<X>>;
  16. // TODO something to handle the & variants...
  17. #define ITEM(i) std::get<(i)>(e.t)
  18. // FIXME gets() vs p2781r2
  19. // qualified ra::iota is necessary not to pick std::iota through ADL (test/headers.cc).
  20. template <is_iota I, iota_op J>
  21. constexpr auto
  22. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  23. {
  24. return ra::iota(ITEM(0).n, ITEM(0).i+ITEM(1), ITEM(0).s);
  25. }
  26. template <iota_op I, is_iota J>
  27. constexpr auto
  28. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  29. {
  30. return ra::iota(ITEM(1).n, ITEM(0)+ITEM(1).i, ITEM(1).s);
  31. }
  32. template <is_iota I, is_iota J>
  33. constexpr auto
  34. optimize(Expr<std::plus<>, std::tuple<I, J>> && e)
  35. {
  36. return ra::iota(maybe_len(e), ITEM(0).i+ITEM(1).i, ITEM(0).gets()+ITEM(1).gets());
  37. }
  38. template <is_iota I, iota_op J>
  39. constexpr auto
  40. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  41. {
  42. return ra::iota(ITEM(0).n, ITEM(0).i-ITEM(1), ITEM(0).s);
  43. }
  44. template <iota_op I, is_iota J>
  45. constexpr auto
  46. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  47. {
  48. return ra::iota(ITEM(1).n, ITEM(0)-ITEM(1).i, -ITEM(1).s);
  49. }
  50. template <is_iota I, is_iota J>
  51. constexpr auto
  52. optimize(Expr<std::minus<>, std::tuple<I, J>> && e)
  53. {
  54. return ra::iota(maybe_len(e), ITEM(0).i-ITEM(1).i, ITEM(0).gets()-ITEM(1).gets());
  55. }
  56. template <is_iota I, iota_op J>
  57. constexpr auto
  58. optimize(Expr<std::multiplies<>, std::tuple<I, J>> && e)
  59. {
  60. return ra::iota(ITEM(0).n, ITEM(0).i*ITEM(1), ITEM(0).gets()*ITEM(1));
  61. }
  62. template <iota_op I, is_iota J>
  63. constexpr auto
  64. optimize(Expr<std::multiplies<>, std::tuple<I, J>> && e)
  65. {
  66. return ra::iota(ITEM(1).n, ITEM(0)*ITEM(1).i, ITEM(0)*ITEM(1).gets());
  67. }
  68. template <is_iota I>
  69. constexpr auto
  70. optimize(Expr<std::negate<>, std::tuple<I>> && e)
  71. {
  72. return ra::iota(ITEM(0).n, -ITEM(0).i, -ITEM(0).gets());
  73. }
  74. #endif // RA_DO_OPT_IOTA
  75. #if RA_DO_OPT_SMALLVECTOR==1
  76. // FIXME I'm not able to match CellSmall directly, maybe bc N is in std::array { Dim { N, 1 } }.
  77. template <class A, class T, dim_t N> constexpr bool match_small =
  78. std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template iterator<0>>
  79. || std::is_same_v<std::decay_t<A>, typename ra::Small<T, N>::template const_iterator<0>>;
  80. static_assert(match_small<ra::CellSmall<double, ic_t<std::array { Dim { 4, 1 } }>, 0>, double, 4>);
  81. #define RA_OPT_SMALLVECTOR_OP(OP, NAME, T, N) \
  82. template <class A, class B> requires (match_small<A, T, N> && match_small<B, T, N>) \
  83. constexpr auto \
  84. optimize(ra::Expr<NAME, std::tuple<A, B>> && e) \
  85. { \
  86. alignas (alignof(extvector<T, N>)) ra::Small<T, N> val; \
  87. *(extvector<T, N> *)(&val) = *(extvector<T, N> *)((ITEM(0).c.cp)) OP *(extvector<T, N> *)((ITEM(1).c.cp)); \
  88. return val; \
  89. }
  90. #define RA_OPT_SMALLVECTOR_OP_FUNS(T, N) \
  91. static_assert(0==alignof(ra::Small<T, N>) % alignof(extvector<T, N>)); \
  92. RA_OPT_SMALLVECTOR_OP(+, std::plus<>, T, N) \
  93. RA_OPT_SMALLVECTOR_OP(-, std::minus<>, T, N) \
  94. RA_OPT_SMALLVECTOR_OP(/, std::divides<>, T, N) \
  95. RA_OPT_SMALLVECTOR_OP(*, std::multiplies<>, T, N)
  96. #define RA_OPT_SMALLVECTOR_OP_SIZES(T) \
  97. RA_OPT_SMALLVECTOR_OP_FUNS(T, 2) \
  98. RA_OPT_SMALLVECTOR_OP_FUNS(T, 4) \
  99. RA_OPT_SMALLVECTOR_OP_FUNS(T, 8)
  100. FOR_EACH(RA_OPT_SMALLVECTOR_OP_SIZES, float, double)
  101. #undef RA_OPT_SMALLVECTOR_OP_SIZES
  102. #undef RA_OPT_SMALLVECTOR_OP_FUNS
  103. #undef RA_OPT_SMALLVECTOR_OP_OP
  104. #endif // RA_DO_OPT_SMALLVECTOR
  105. #undef ITEM
  106. } // namespace ra