small.hh 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra - Arrays with static lengths/strides, cf big.hh.
  3. // (c) Daniel Llorens - 2013-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #pragma once
  9. #include "ply.hh"
  10. namespace ra {
  11. constexpr rank_t
  12. rank_sum(rank_t a, rank_t b) { return (ANY==a || ANY==b) ? ANY : a+b; }
  13. constexpr rank_t
  14. rank_diff(rank_t a, rank_t b) { return (ANY==a || ANY==b) ? ANY : a-b; }
  15. // cr>=0 is cell rank. -cr>0 is frame rank. TODO How to say frame rank 0.
  16. constexpr rank_t
  17. rank_cell(rank_t r, rank_t cr) { return cr>=0 ? cr /* indep */ : r==ANY ? ANY /* defer */ : (r+cr); }
  18. constexpr rank_t
  19. rank_frame(rank_t r, rank_t cr) { return r==ANY ? ANY /* defer */ : cr>=0 ? (r-cr) /* indep */ : -cr; }
  20. struct Dim { dim_t len, step; };
  21. inline std::ostream &
  22. operator<<(std::ostream & o, Dim const & dim) { return (o << "[Dim " << dim.len << " " << dim.step << "]"); }
  23. template <class Dimv>
  24. constexpr bool
  25. is_c_order_dimv(Dimv const & dimv, bool unitstep=true)
  26. {
  27. bool steps = true;
  28. dim_t s = 1;
  29. int k = dimv.size();
  30. if (!unitstep) {
  31. while (--k>=0 && 1==dimv[k].len) {}
  32. if (k<=0) { return true; }
  33. s = dimv[k].step*dimv[k].len;
  34. }
  35. while (--k>=0) {
  36. steps = steps && (1==dimv[k].len || dimv[k].step==s);
  37. s *= dimv[k].len;
  38. }
  39. return s==0 || steps;
  40. }
  41. template <class V> constexpr bool
  42. is_c_order(V const & v, bool unitstep=true) { return is_c_order_dimv(v.dimv, unitstep); }
  43. template <class Dimv, class S>
  44. constexpr dim_t
  45. filldim(Dimv & dimv, S && shape)
  46. {
  47. map(&Dim::len, dimv) = shape;
  48. dim_t s = 1;
  49. for (int k=dimv.size(); --k>=0;) {
  50. dimv[k].step = s;
  51. RA_CHECK(dimv[k].len>=0, "Bad len[", k, "] ", dimv[k].len, ".");
  52. s *= dimv[k].len;
  53. }
  54. return s;
  55. }
  56. // FIXME parameterize Small on dimv, then simplify.
  57. template <class lens>
  58. struct default_steps_
  59. {
  60. constexpr static int rank = mp::len<lens>;
  61. constexpr static auto dimv = [] { std::array<Dim, rank> dimv; filldim(dimv, mp::tuple_values<dim_t, lens>()); return dimv; } ();
  62. using type = decltype([] { return std::apply([](auto ... k) { return mp::int_list<dimv[k].step ...> {}; }, mp::iota<rank> {}); } ());
  63. };
  64. template <class lens> using default_steps = typename default_steps_<lens>::type;
  65. template <class V>
  66. constexpr dim_t
  67. shape(V const & v, int k)
  68. {
  69. RA_CHECK(inside(k, rank(v)), "Bad axis ", k, " for rank ", rank(v), ".");
  70. return v.len(k);
  71. }
  72. template <class A>
  73. constexpr void
  74. resize(A & a, dim_t s)
  75. {
  76. if constexpr (ANY==size_s<A>()) {
  77. RA_CHECK(s>=0, "Bad resize ", s, ".");
  78. a.resize(s);
  79. } else {
  80. RA_CHECK(s==start(a).len(0) || BAD==s, "Bad resize ", s, " vs ", start(a).len(0), ".");
  81. }
  82. }
  83. // --------------------
  84. // Slicing helpers
  85. // --------------------
  86. template <int n=BAD> struct dots_t { static_assert(n>=0 || BAD==n); };
  87. template <int n=BAD> constexpr dots_t<n> dots = dots_t<n>();
  88. constexpr auto all = dots<1>;
  89. template <int n> struct insert_t { static_assert(n>=0); };
  90. template <int n=1> constexpr insert_t<n> insert = insert_t<n>();
  91. template <class I> constexpr bool is_scalar_index = ra::is_zero_or_scalar<I>;
  92. struct beatable_t
  93. {
  94. bool rt, ct; // beatable at all and statically
  95. int src, dst, add; // axes on src, dst, and dst-src
  96. };
  97. template <class I> constexpr beatable_t beatable_def
  98. = { .rt=is_scalar_index<I>, .ct=is_scalar_index<I>, .src=1, .dst=0, .add=-1 };
  99. template <int n> constexpr beatable_t beatable_def<dots_t<n>>
  100. = { .rt=true, .ct = true, .src=n, .dst=n, .add=0 };
  101. template <int n> constexpr beatable_t beatable_def<insert_t<n>>
  102. = { .rt=true, .ct = true, .src=0, .dst=n, .add=n };
  103. template <class I>
  104. struct is_constant_iota
  105. {
  106. using Ilen = std::decay_t<decltype(with_len(ic<1>, std::declval<I>()))>; // arbitrary constant len
  107. constexpr static bool value = is_constant<typename Ilen::N> && is_constant<typename Ilen::S>;
  108. };
  109. template <class I> requires (is_iota<I>) constexpr beatable_t beatable_def<I>
  110. = { .rt=(BAD!=I::nn), .ct=is_constant_iota<I>::value, .src=1, .dst=1, .add=0 };
  111. template <class I> constexpr beatable_t beatable = beatable_def<std::decay_t<I>>;
  112. template <int k=0, class V>
  113. constexpr decltype(auto)
  114. maybe_len(V && v)
  115. {
  116. if constexpr (ANY!=std::decay_t<V>::len_s(k)) {
  117. return ic<std::decay_t<V>::len_s(k)>;
  118. } else {
  119. return v.len(k);
  120. }
  121. }
  122. template <class A, class ... I> constexpr decltype(auto) from(A && a, I && ... i);
  123. template <int N, class KK=mp::iota<N>> struct unbeat;
  124. template <int N, int ... k>
  125. struct unbeat<N, mp::int_list<k ...>>
  126. {
  127. template <class V, class ... I>
  128. constexpr static decltype(auto)
  129. op(V & v, I && ... i)
  130. {
  131. return from(v, with_len(maybe_len<k>(v), RA_FWD(i)) ...);
  132. }
  133. };
  134. // --------------------
  135. // Develop indices
  136. // --------------------
  137. template <rank_t k, rank_t end, class Q, class P, class S>
  138. constexpr dim_t
  139. indexer(Q const & q, P && pp, S const & ss0, dim_t c)
  140. {
  141. if constexpr (k==end) {
  142. return c;
  143. } else {
  144. auto pk = *pp;
  145. RA_CHECK(inside(pk, q.len(k)) || (BAD==q.len(k) && 0==q.step(k)));
  146. return pp.mov(ss0), indexer<k+1, end>(q, pp, ss0, c + (q.step(k) * pk));
  147. }
  148. }
  149. template <class Q, class P, class S>
  150. constexpr dim_t
  151. indexer(rank_t end, Q const & q, P && pp, S const & ss0)
  152. {
  153. dim_t c = 0;
  154. for (rank_t k=0; k<end; ++k, pp.mov(ss0)) {
  155. auto pk = *pp;
  156. RA_CHECK(inside(pk, q.len(k)) || (BAD==q.len(k) && 0==q.step(k)));
  157. c += q.step(k) * pk;
  158. }
  159. return c;
  160. }
  161. template <class Q, class P>
  162. constexpr dim_t
  163. longer(Q const & q, P const & pp)
  164. {
  165. decltype(auto) p = start(pp);
  166. if constexpr (ANY==rank_s<P>()) {
  167. RA_CHECK(1==rank(p), "Bad rank ", rank(p), " for subscript.");
  168. } else {
  169. static_assert(1==rank_s<P>(), "Bad rank for subscript.");
  170. }
  171. if constexpr (ANY==size_s<P>() || ANY==rank_s<Q>()) {
  172. RA_CHECK(p.len(0) >= q.rank(), "Too few indices.");
  173. } else {
  174. static_assert(size_s<P>() >= rank_s<Q>(), "Too few indices.");
  175. }
  176. if constexpr (ANY==rank_s<Q>()) {
  177. return indexer(q.rank(), q, p, p.step(0));
  178. } else {
  179. return indexer<0, rank_s<Q>()>(q, p, p.step(0), 0);
  180. }
  181. }
  182. // --------------------
  183. // Small iterator
  184. // --------------------
  185. template <class T, class lens, class steps> struct SmallView;
  186. // TODO Refactor with CellBig / STLIterator
  187. template <class T, class Dimv, rank_t spec=0>
  188. struct CellSmall
  189. {
  190. constexpr static auto dimv = Dimv::value;
  191. static_assert(spec!=ANY && spec!=BAD, "Bad cell rank.");
  192. constexpr static rank_t fullr = ssize(dimv);
  193. constexpr static rank_t cellr = rank_cell(fullr, spec);
  194. constexpr static rank_t framer = rank_frame(fullr, spec);
  195. static_assert(cellr>=0 || cellr==ANY, "Bad cell rank.");
  196. static_assert(framer>=0 || framer==ANY, "Bad frame rank.");
  197. static_assert(choose_rank(fullr, cellr)==fullr, "Bad cell rank.");
  198. // FIXME Small take dimv instead of lens/steps
  199. using clens = decltype(std::apply([](auto ... i) { return mp::int_list<dimv[i].len ...> {}; }, mp::iota<cellr, framer> {}));
  200. using csteps = decltype(std::apply([](auto ... i) { return mp::int_list<dimv[i].step ...> {}; }, mp::iota<cellr, framer> {}));
  201. using ctype = SmallView<T, clens, csteps>;
  202. using value_type = std::conditional_t<0==cellr, T, ctype>;
  203. ctype c;
  204. consteval static rank_t rank() { return framer; }
  205. #pragma GCC diagnostic push // gcc 13.2
  206. #pragma GCC diagnostic warning "-Warray-bounds"
  207. constexpr static dim_t len(int k) { return dimv[k].len; } // len(0<=k<rank) or step(0<=k)
  208. #pragma GCC diagnostic pop
  209. constexpr static dim_t len_s(int k) { return len(k); }
  210. constexpr static dim_t step(int k) { return k<rank() ? dimv[k].step : 0; }
  211. constexpr void adv(rank_t k, dim_t d) { c.cp += step(k)*d; }
  212. constexpr static bool keep_step(dim_t st, int z, int j) { return st*step(z)==step(j); }
  213. // see STLIterator for the case of s_[0]=0, etc. [ra12].
  214. constexpr CellSmall(T * p): c { p } {}
  215. constexpr CellSmall(CellSmall const & ci) = default;
  216. RA_DEF_ASSIGNOPS_DEFAULT_SET
  217. constexpr decltype(auto)
  218. at(auto const & i) const
  219. {
  220. auto d = longer(*this, i);
  221. if constexpr (0==cellr) {
  222. return c.cp[d];
  223. } else {
  224. ctype cc(c); cc.cp += d;
  225. return cc;
  226. }
  227. }
  228. constexpr decltype(auto) operator*() const requires (0==cellr) { return *(c.cp); }
  229. constexpr ctype operator*() const requires (0!=cellr) { return c; } // FIXME cf CellBig
  230. constexpr auto save() const { return c.cp; }
  231. constexpr void load(decltype(c.cp) cp) { c.cp = cp; }
  232. constexpr void mov(dim_t d) { c.cp += d; }
  233. };
  234. // ---------------------
  235. // nested braces for Small initializers + forward decl Small types
  236. // ---------------------
  237. // Other than the expr constructor, SmallArray has 4 others: empty, scalar, ravel, and nested. The scalar constructor is needed when T isn't registered as ra::scalar.
  238. // The ravel/nested/scalar constructors can be ambiguous. This is solved by defining arguments to noarg variants.
  239. template <class T, class lens>
  240. struct nested_tuple
  241. {
  242. using sub = noarg;
  243. using list = std::tuple<noarg>; // match the template for SmallArray.
  244. };
  245. template <class T, class lens>
  246. struct small_args
  247. {
  248. constexpr static int rs = mp::len<lens>;
  249. // if len(0)==0, prefer empty constructor. If shape==[1] scalar constructor.
  250. using nested = std::conditional_t<
  251. [] { if constexpr (0<rs) { int s = mp::ref<lens, 0>::value; return 0==s || (1==rs && 1==s); } else { return true; } } (),
  252. std::tuple<noarg>, // match SmallArray template
  253. typename nested_tuple<T, lens>::list>;
  254. // if rank=1 prefer nested tuple constructor. If rank=0 prefer scalar constructor.
  255. using ravel = std::conditional_t<
  256. (rs <=1) || (mp::apply<mp::prod, lens>::value <= 1),
  257. std::tuple<noarg, noarg>, // match SmallArray template
  258. mp::makelist<mp::apply<mp::prod, lens>::value, T>>;
  259. };
  260. template <class T, class lens, class steps,
  261. class nested_args = small_args<T, lens>::nested,
  262. class ravel_args = small_args<T, lens>::ravel>
  263. struct SmallArray;
  264. template <class T, dim_t ... lens>
  265. using Small = SmallArray<T, mp::int_list<lens ...>, default_steps<mp::int_list<lens ...>>>;
  266. template <class T, int S0>
  267. struct nested_tuple<T, mp::int_list<S0>>
  268. {
  269. using sub = T;
  270. using list = mp::makelist<S0, T>;
  271. };
  272. template <class T, int S0, int S1, int ... S>
  273. struct nested_tuple<T, mp::int_list<S0, S1, S ...>>
  274. {
  275. using sub = Small<T, S1, S ...>;
  276. using list = mp::makelist<S0, sub>;
  277. };
  278. // --------------------
  279. // Base for both small view & container
  280. // --------------------
  281. template <class lens_, class steps_, class ... I>
  282. struct FilterDims
  283. {
  284. using lens = lens_;
  285. using steps = steps_;
  286. };
  287. template <class lens_, class steps_, class I0, class ... I> requires (!is_iota<I0>)
  288. struct FilterDims<lens_, steps_, I0, I ...>
  289. {
  290. constexpr static bool stretch = (beatable<I0>.dst==BAD);
  291. static_assert(!stretch || ((beatable<I>.dst!=BAD) && ...), "Cannot repeat stretch index.");
  292. constexpr static int dst = stretch ? (mp::len<lens_> - (0 + ... + beatable<I>.src)) : beatable<I0>.dst;
  293. constexpr static int src = stretch ? (mp::len<lens_> - (0 + ... + beatable<I>.src)) : beatable<I0>.src;
  294. using next = FilterDims<mp::drop<lens_, src>, mp::drop<steps_, src>, I ...>;
  295. using lens = mp::append<mp::take<lens_, dst>, typename next::lens>;
  296. using steps = mp::append<mp::take<steps_, dst>, typename next::steps>;
  297. };
  298. template <class lens_, class steps_, class I0, class ... I> requires (is_iota<I0>)
  299. struct FilterDims<lens_, steps_, I0, I ...>
  300. {
  301. constexpr static int dst = beatable<I0>.dst;
  302. constexpr static int src = beatable<I0>.src;
  303. using next = FilterDims<mp::drop<lens_, src>, mp::drop<steps_, src>, I ...>;
  304. using lens = mp::append<mp::int_list<I0::nn>, typename next::lens>;
  305. using steps = mp::append<mp::int_list<(mp::ref<steps_, 0>::value * I0::gets())>, typename next::steps>;
  306. };
  307. template <template <class ...> class Child_, class T_, class lens_, class steps_>
  308. struct SmallBase
  309. {
  310. using lens = lens_;
  311. using steps = steps_;
  312. using T = T_;
  313. using Child = Child_<T, lens, steps>;
  314. static_assert(mp::len<lens> == mp::len<steps>, "Mismatched lengths & steps.");
  315. consteval static rank_t rank() { return mp::len<lens>; }
  316. constexpr static auto dimv = std::apply([](auto ... i) { return std::array<Dim, rank()> { Dim { mp::ref<lens, i>::value, mp::ref<steps, i>::value } ... }; }, mp::iota<rank()> {});
  317. constexpr static auto theshape = mp::tuple_values<dim_t, lens>();
  318. consteval static dim_t size() { return std::apply([](auto ... s) { return (s * ... * 1); }, theshape); }
  319. constexpr static dim_t len(int k) { return dimv[k].len; }
  320. consteval static dim_t size_s() { return size(); }
  321. constexpr static dim_t len_s(int k) { return len(k); }
  322. constexpr static dim_t step(int k) { return dimv[k].step; }
  323. consteval static decltype(auto) shape() { return theshape; }
  324. // TODO check steps
  325. static_assert(std::apply([](auto ... s) { return ((0<=s) && ...); }, theshape), "Bad shape.");
  326. constexpr static bool convertible_to_scalar = (1==size()); // allowed for 1 for coord types
  327. template <int k>
  328. constexpr static dim_t
  329. select(dim_t i)
  330. {
  331. RA_CHECK(inside(i, len(k)),
  332. "Out of range for len[", k, "]=", len(k), ": ", i, ".");
  333. return step(k)*i;
  334. }
  335. template <int k, class I> requires (is_iota<I>)
  336. constexpr static dim_t
  337. select(I i)
  338. {
  339. if constexpr (0==i.n) {
  340. return 0;
  341. } else if constexpr ((1==i.n ? 1 : (i.s<0 ? -i.s : i.s)*(i.n-1)+1) > len(k)) { // FIXME c++23 std::abs
  342. static_assert(always_false<I>, "Out of range.");
  343. } else {
  344. RA_CHECK(inside(i, len(k)),
  345. "Out of range for len[", k, "]=", len(k), ": iota [", i.n, " ", i.i, " ", i.s, "]");
  346. }
  347. return step(k)*i.i;
  348. }
  349. template <int k, int n>
  350. constexpr static dim_t
  351. select(dots_t<n> i)
  352. {
  353. return 0;
  354. }
  355. template <int k, class I0, class ... I>
  356. constexpr static dim_t
  357. select_loop(I0 && i0, I && ... i)
  358. {
  359. constexpr int nn = (BAD==beatable<I0>.src) ? (rank() - k - (0 + ... + beatable<I>.src)) : beatable<I0>.src;
  360. return select<k>(with_len(ic<len(k)>, RA_FWD(i0)))
  361. + select_loop<k + nn>(RA_FWD(i) ...);
  362. }
  363. template <int k>
  364. consteval static dim_t
  365. select_loop()
  366. {
  367. return 0;
  368. }
  369. #define RA_CONST_OR_NOT(CONST) \
  370. constexpr T CONST * data() CONST { return static_cast<Child CONST &>(*this).cp; } \
  371. template <class ... I> \
  372. constexpr decltype(auto) \
  373. operator()(I && ... i) CONST \
  374. { \
  375. constexpr int stretch = (0 + ... + (beatable<I>.dst==BAD)); \
  376. static_assert(stretch<=1, "Cannot repeat stretch index."); \
  377. if constexpr ((0 + ... + is_scalar_index<I>)==rank()) { \
  378. return data()[select_loop<0>(i ...)]; \
  379. /* FIXME with_len before this, cf is_constant_iota */ \
  380. } else if constexpr ((beatable<I>.ct && ...)) { \
  381. using FD = FilterDims<lens, steps, std::decay_t<I> ...>; \
  382. return SmallView<T CONST, typename FD::lens, typename FD::steps> (data() + select_loop<0>(i ...)); \
  383. } else { /* TODO partial beating */ \
  384. return unbeat<sizeof...(I)>::op(*this, RA_FWD(i) ...); \
  385. } \
  386. } \
  387. template <class ... I> \
  388. constexpr decltype(auto) \
  389. operator[](I && ... i) CONST { return (*this)(RA_FWD(i) ...); } \
  390. \
  391. template <class I> \
  392. constexpr decltype(auto) \
  393. at(I && i) CONST \
  394. { /* FIXME no way to say 'frame rank 0' so -size wouldn't work. */ \
  395. constexpr rank_t crank = rank_diff(rank(), ra::size_s<I>()); \
  396. static_assert(crank>=0); /* to make out the output type */ \
  397. return iter<crank>().at(RA_FWD(i)); \
  398. } \
  399. /* maybe remove if ic becomes easier to use */ \
  400. template <int ss, int oo=0> \
  401. constexpr auto \
  402. as() CONST \
  403. { \
  404. return operator()(ra::iota(ra::ic<ss>, oo)); \
  405. } \
  406. decltype(auto) \
  407. back() CONST \
  408. { \
  409. static_assert(rank()>=1 && size()>0, "back() is not available"); \
  410. return (*this)[size()-1]; \
  411. } \
  412. constexpr operator T CONST & () CONST requires (convertible_to_scalar) { return data()[0]; }
  413. FOR_EACH(RA_CONST_OR_NOT, /*not const*/, const)
  414. #undef RA_CONST_OR_NOT
  415. #define DEF_ASSIGNOPS(OP) \
  416. template <class X> requires (!mp::is_tuple<std::decay_t<X>>) \
  417. constexpr Child & \
  418. operator OP(X && x) \
  419. { \
  420. ra::start(*this) OP x; \
  421. return static_cast<Child &>(*this); \
  422. }
  423. FOR_EACH(DEF_ASSIGNOPS, =, *=, +=, -=, /=)
  424. #undef DEF_ASSIGNOPS
  425. // braces don't match X &&
  426. constexpr Child &
  427. operator=(small_args<T, lens>::nested const & x)
  428. {
  429. ra::iter<-1>(*this) = mp::from_tuple<std::array<typename nested_tuple<T, lens>::sub, len(0)>>(x);
  430. return static_cast<Child &>(*this);
  431. }
  432. // braces row-major ravel for rank!=1
  433. constexpr Child &
  434. operator=(small_args<T, lens>::ravel const & x)
  435. {
  436. auto a = mp::from_tuple<std::array<T, size()>>(x);
  437. std::copy(a.begin(), a.end(), begin());
  438. return static_cast<Child &>(*this);
  439. }
  440. template <rank_t c=0> using iterator = CellSmall<T, ic_t<dimv>, c>;
  441. template <rank_t c=0> using const_iterator = CellSmall<T const, ic_t<dimv>, c>;
  442. template <rank_t c=0> constexpr iterator<c> iter() { return data(); }
  443. template <rank_t c=0> constexpr const_iterator<c> iter() const { return data(); }
  444. constexpr static bool def = is_c_order_dimv(dimv);
  445. constexpr auto begin() const { if constexpr (def) return data(); else return STLIterator(iter()); }
  446. constexpr auto begin() { if constexpr (def) return data(); else return STLIterator(iter()); }
  447. constexpr auto end() const { if constexpr (def) return data()+size(); else return std::default_sentinel; }
  448. constexpr auto end() { if constexpr (def) return data()+size(); else return std::default_sentinel; }
  449. };
  450. // ---------------------
  451. // Small view & container
  452. // ---------------------
  453. // Strides are compile time, so we can put most members in the view type.
  454. template <class T, class lens, class steps>
  455. struct SmallView: public SmallBase<SmallView, T, lens, steps>
  456. {
  457. using Base = SmallBase<SmallView, T, lens, steps>;
  458. using Base::operator=;
  459. T * cp;
  460. constexpr SmallView(T * cp_): cp(cp_) {}
  461. constexpr SmallView(SmallView const & s): cp(s.cp) {}
  462. constexpr operator T const & () const { static_assert(Base::convertible_to_scalar); return cp[0]; }
  463. constexpr operator T & () { static_assert(Base::convertible_to_scalar); return cp[0]; }
  464. using ViewConst = SmallView<T const, lens, steps>;
  465. constexpr operator ViewConst () const requires (!std::is_const_v<T>) { return ViewConst(cp); }
  466. constexpr SmallView const & view() const { return *this; }
  467. constexpr SmallView & view() { return *this; }
  468. };
  469. #if defined (__clang__)
  470. template <class T, int N> using extvector __attribute__((ext_vector_type(N))) = T;
  471. #else
  472. template <class T, int N> using extvector __attribute__((vector_size(N*sizeof(T)))) = T;
  473. #endif
  474. template <class Z, class ... T>
  475. constexpr static bool equal_to_any = (std::is_same_v<Z, T> || ...);
  476. template <class T, size_t N>
  477. consteval size_t
  478. align_req()
  479. {
  480. if constexpr (equal_to_any<T, char, unsigned char, short, unsigned short,
  481. int, unsigned int, long, unsigned long, long long, unsigned long long,
  482. float, double>
  483. && 0<N && 0==(N & (N-1))) {
  484. return alignof(extvector<T, N>);
  485. } else {
  486. return alignof(T[N]);
  487. }
  488. }
  489. template <class T, class lens, class steps, class ... nested_args, class ... ravel_args>
  490. struct
  491. #if RA_DO_OPT_SMALLVECTOR==1
  492. alignas(align_req<T, mp::apply<mp::prod, lens>::value>())
  493. #else
  494. #endif
  495. SmallArray<T, lens, steps, std::tuple<nested_args ...>, std::tuple<ravel_args ...>>
  496. : public SmallBase<SmallArray, T, lens, steps>
  497. {
  498. using Base = SmallBase<SmallArray, T, lens, steps>;
  499. using Base::rank, Base::size;
  500. T cp[Base::size()]; // cf what std::array does for zero size; wish zero size just worked :-/
  501. constexpr SmallArray() {}
  502. constexpr SmallArray(nested_args const & ... x)
  503. {
  504. static_cast<Base &>(*this) = typename small_args<T, lens>::nested { x ... };
  505. }
  506. // braces row-major ravel for rank!=1
  507. constexpr SmallArray(ravel_args const & ... x)
  508. {
  509. static_cast<Base &>(*this) = typename small_args<T, lens>::ravel { x ... };
  510. }
  511. // needed if T isn't registered as scalar [ra44]
  512. constexpr SmallArray(T const & t)
  513. {
  514. for (auto & x: cp) { x = t; }
  515. }
  516. // X && x makes this a better match than nested_args ... for 1 argument.
  517. template <class X>
  518. requires (!std::is_same_v<std::decay_t<X>, T> && !mp::is_tuple<std::decay_t<X>>)
  519. constexpr SmallArray(X && x)
  520. {
  521. static_cast<Base &>(*this) = x;
  522. }
  523. using View = SmallView<T, lens, steps>;
  524. using ViewConst = SmallView<T const, lens, steps>;
  525. constexpr View view() { return View(cp); }
  526. constexpr ViewConst view() const { return ViewConst(cp); }
  527. // conversion to const
  528. constexpr operator View () { return View(cp); }
  529. constexpr operator ViewConst () const { return ViewConst(cp); }
  530. };
  531. template <class A0, class ... A> SmallArray(A0, A ...) -> Small<A0, 1+sizeof...(A)>;
  532. // FIXME remove the need, also of (S, begin, end) in Container, once nested_tuple constructors work.
  533. template <class A, class I, class J>
  534. constexpr auto
  535. ravel_from_iterators(I && begin, J && end)
  536. {
  537. A a;
  538. std::copy(RA_FWD(begin), RA_FWD(end), a.begin());
  539. return a;
  540. }
  541. // ---------------------
  542. // Builtin arrays.
  543. // ---------------------
  544. template <class T>
  545. constexpr auto
  546. peel(T && t)
  547. {
  548. static_assert(0 < std::extent_v<std::remove_cvref_t<T>, 0>);
  549. if constexpr (1 < std::rank_v<std::remove_cvref_t<T>>) {
  550. return peel(*std::data(RA_FWD(t)));
  551. } else {
  552. return std::data(t);
  553. }
  554. }
  555. template <class T> requires (is_builtin_array<T>)
  556. constexpr auto
  557. start(T && t)
  558. {
  559. using A = std::remove_volatile_t<std::remove_reference_t<T>>; // preserve const
  560. using lens = decltype(std::apply([](auto ... i) { return mp::int_list<std::extent_v<A, i> ...> {}; },
  561. mp::iota<std::rank_v<A>> {}));
  562. return SmallView<std::remove_all_extents_t<A>, lens, default_steps<lens>>(peel(t)).iter();
  563. }
  564. // --------------------
  565. // Small view ops, see View ops in big.hh.
  566. // FIXME Merge with Reframe (eg beat(reframe(a)) -> transpose(a) ?)
  567. // --------------------
  568. template <class A, class i>
  569. struct axis_indices
  570. {
  571. template <class T> using match_index = ic_t<(T::value==i::value)>;
  572. using I = mp::iota<mp::len<A>>;
  573. using type = mp::Filter_<mp::map<match_index, A>, I>;
  574. };
  575. template <class axes_list, class src_lens, class src_steps>
  576. struct axes_list_indices
  577. {
  578. static_assert(mp::len<axes_list> == mp::len<src_lens>, "Bad size for transposed axes list.");
  579. constexpr static int talmax = mp::fold<mp::max, void, axes_list>::value;
  580. constexpr static int talmin = mp::fold<mp::min, void, axes_list>::value;
  581. static_assert(talmin >= 0, "Bad index in transposed axes list.");
  582. template <class dst_i> struct dst_indices_
  583. {
  584. using type = typename axis_indices<axes_list, dst_i>::type;
  585. template <class i> using lensi = mp::ref<src_lens, i::value>;
  586. template <class i> using stepsi = mp::ref<src_steps, i::value>;
  587. using step = mp::fold<mp::sum, void, mp::map<stepsi, type>>;
  588. using len = mp::fold<mp::min, void, mp::map<lensi, type>>;
  589. };
  590. template <class dst_i> using dst_indices = typename dst_indices_<dst_i>::type;
  591. template <class dst_i> using dst_len = typename dst_indices_<dst_i>::len;
  592. template <class dst_i> using dst_step = typename dst_indices_<dst_i>::step;
  593. using dst = mp::iota<(talmax>=0 ? (1+talmax) : 0)>;
  594. using type = mp::map<dst_indices, dst>;
  595. using lens = mp::map<dst_len, dst>;
  596. using steps = mp::map<dst_step, dst>;
  597. };
  598. RA_IS_DEF(cv_smallview, (std::is_convertible_v<A, SmallView<typename A::T, typename A::lens, typename A::steps>>));
  599. template <int ... Iarg, class A> requires (cv_smallview<A>)
  600. constexpr auto
  601. transpose(A && a_)
  602. {
  603. decltype(auto) a = a_.view();
  604. using AA = typename std::decay_t<decltype(a)>;
  605. using ti = axes_list_indices<mp::int_list<Iarg ...>, typename AA::lens, typename AA::steps>;
  606. return SmallView<typename AA::T, typename ti::lens, typename ti::steps>(a.data());
  607. };
  608. template <class A> requires (cv_smallview<A>)
  609. constexpr auto
  610. diag(A && a)
  611. {
  612. return transpose<0, 0>(a);
  613. }
  614. // TODO generalize
  615. template <class A1, class A2> requires (cv_smallview<A1> || cv_smallview<A2>)
  616. constexpr auto
  617. cat(A1 && a1_, A2 && a2_)
  618. {
  619. if constexpr (cv_smallview<A1> && cv_smallview<A2>) {
  620. decltype(auto) a1 = a1_.view();
  621. decltype(auto) a2 = a2_.view();
  622. static_assert(1==a1.rank() && 1==a2.rank(), "Bad ranks for cat."); // gcc accepts a1.rank(), etc.
  623. using T = std::common_type_t<std::decay_t<decltype(a1[0])>, std::decay_t<decltype(a2[0])>>;
  624. Small<T, a1.size()+a2.size()> val;
  625. std::copy(a1.begin(), a1.end(), val.begin());
  626. std::copy(a2.begin(), a2.end(), val.begin()+a1.size());
  627. return val;
  628. } else if constexpr (cv_smallview<A1> && is_scalar<A2>) {
  629. decltype(auto) a1 = a1_.view();
  630. static_assert(1==a1.rank(), "bad ranks for cat");
  631. using T = std::common_type_t<std::decay_t<decltype(a1[0])>, A2>;
  632. Small<T, a1.size()+1> val;
  633. std::copy(a1.begin(), a1.end(), val.begin());
  634. val[a1.size()] = a2_;
  635. return val;
  636. } else if constexpr (is_scalar<A1> && cv_smallview<A2>) {
  637. decltype(auto) a2 = a2_.view();
  638. static_assert(1==a2.rank(), "bad ranks for cat");
  639. using T = std::common_type_t<A1, std::decay_t<decltype(a2[0])>>;
  640. Small<T, 1+a2.size()> val;
  641. val[0] = a1_;
  642. std::copy(a2.begin(), a2.end(), val.begin()+1);
  643. return val;
  644. } else {
  645. static_assert(always_false<A1, A2>);
  646. }
  647. }
  648. template <class super_t, class A> requires (cv_smallview<A>)
  649. constexpr auto
  650. explode(A && a_)
  651. {
  652. // result has steps in super_t, but to support general steps we'd need steps in T. FIXME?
  653. decltype(auto) a = a_.view();
  654. using AA = std::decay_t<decltype(a)>;
  655. static_assert(super_t::def);
  656. constexpr rank_t ra = ra::rank_s<AA>();
  657. constexpr rank_t rb = rank_s<super_t>();
  658. static_assert(std::is_same_v<mp::drop<typename AA::lens, ra-rb>, typename super_t::lens>);
  659. static_assert(std::is_same_v<mp::drop<typename AA::steps, ra-rb>, typename super_t::steps>);
  660. constexpr dim_t supers = ra::size_s<super_t>();
  661. using csteps = decltype(std::apply([](auto ... i)
  662. {
  663. static_assert(((i==(i/supers)*supers) && ...));
  664. return mp::int_list<(i/supers) ...> {};
  665. }, mp::take<typename AA::steps, ra-rb> {}));
  666. return SmallView<super_t, mp::take<typename AA::lens, ra-rb>, csteps>((super_t *) a.data());
  667. }
  668. } // namespace ra