OrderedTuples.java 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. package gnu.xquery.util;
  2. import gnu.lists.*;
  3. import gnu.mapping.*;
  4. import gnu.kawa.functions.NumberCompare;
  5. import gnu.kawa.xml.KNode;
  6. import gnu.kawa.xml.UntypedAtomic;
  7. /** Helper class used in conjunction with {@link OrderedMap}.
  8. * It has the tuples from the {@code for} and {@code let}-clauses,
  9. * as filtered by the {@code where}-clause.
  10. *
  11. * The tuples are sorted using a linked-list version of merge sort.
  12. *
  13. * The sequence of n tuples for m variables is represented using
  14. * an array of length n where each element is an array of length m.
  15. * A possible future optimization would be to instead use m
  16. * different arrays of of length n. The advantage is that each
  17. * of the M arrays could have the "correct" type for each variable,
  18. * and so we avoid casts or boxing/unboxing.
  19. */
  20. public class OrderedTuples extends FilterConsumer
  21. {
  22. /** The number of tuples. */
  23. int n;
  24. /** The sequence of tuples, in input (unsorted) order. */
  25. Object[] tuples; // Actually: Object[][] tuples.
  26. /** The compator functions.
  27. * If there are k comparator, the array's length is 3*k.
  28. * comps[3*i] is the i'th comparison function
  29. * (represented as a procedure on a tuple);
  30. * comps[3*i+1] is the i'th set of flags encoded as a string;
  31. * and comps[3*i+2] is the i'th collator
  32. * (either null or a NamedCollator).
  33. */
  34. Object[] comps;
  35. /* The index of the first tuple, after sorting. */
  36. int first;
  37. /** Used to chain the tuples after sorting.
  38. * I.e. if the i'th tuple (is sort order) is tuples[k],
  39. * then the (i+1)'the sorted tuple is tuples[next[k]].
  40. * The end of the list is indicated by -1.
  41. */
  42. int[] next;
  43. /** The return clause, encoded as a procedure on a tuple. */
  44. Procedure body;
  45. public boolean ignoring() { return false; }
  46. public void writeObject(Object v)
  47. {
  48. if (n >= tuples.length)
  49. {
  50. Object[] tmp = new Object[2 * n];
  51. System.arraycopy(tuples, 0, tmp, 0, n);
  52. tuples = tmp;
  53. }
  54. tuples[n++] = v;
  55. }
  56. OrderedTuples ()
  57. {
  58. super(null);
  59. tuples = new Object[10];
  60. }
  61. public static OrderedTuples make$V (Procedure body, Object[] comps)
  62. {
  63. OrderedTuples tuples = new OrderedTuples();
  64. tuples.comps = comps;
  65. tuples.body = body;
  66. return tuples;
  67. }
  68. public void run$X (CallContext ctx) throws Throwable
  69. {
  70. first = listsort(0);
  71. emit(ctx);
  72. }
  73. void emit (CallContext ctx) throws Throwable
  74. {
  75. for (int p = first; p >= 0; p = next[p])
  76. emit(p, ctx);
  77. }
  78. void emit (int index, CallContext ctx) throws Throwable
  79. {
  80. Object[] args = (Object[]) tuples[index];
  81. ctx.setupApplyAll(body, args);
  82. ctx.runUntilDone();
  83. }
  84. // The following sort routine is derived from Simon Tatham's listsort.c.
  85. // However we use array indexes instead of pointers, and the next
  86. // element instead of a next field.
  87. // I.e. p->next is mapped to next[p].
  88. // Instead of NULL we use -1.
  89. /*
  90. * Demonstration code for sorting a linked list.
  91. *
  92. * The algorithm used is Mergesort, because that works really well
  93. * on linked lists, without requiring the O(N) extra space it needs
  94. * when you do it on arrays.
  95. */
  96. /*
  97. * This file is copyright 2001 Simon Tatham.
  98. *
  99. * Permission is hereby granted, free of charge, to any person
  100. * obtaining a copy of this software and associated documentation
  101. * files (the "Software"), to deal in the Software without
  102. * restriction, including without limitation the rights to use,
  103. * copy, modify, merge, publish, distribute, sublicense, and/or
  104. * sell copies of the Software, and to permit persons to whom the
  105. * Software is furnished to do so, subject to the following
  106. * conditions:
  107. *
  108. * The above copyright notice and this permission notice shall be
  109. * included in all copies or substantial portions of the Software.
  110. *
  111. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  112. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  113. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  114. * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
  115. * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  116. * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  117. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  118. * SOFTWARE.
  119. */
  120. int cmp(int a, int b) throws Throwable
  121. {
  122. for (int i = 0; i < comps.length; i += 3)
  123. {
  124. Procedure comparator = (Procedure) comps[i];
  125. String flags = (String) comps[i+1];
  126. NamedCollator collator = (NamedCollator) comps[i+2];
  127. if (collator == null)
  128. collator = NamedCollator.codepointCollation;
  129. Object val1 = comparator.applyN((Object[]) tuples[a]);
  130. Object val2 = comparator.applyN((Object[]) tuples[b]);
  131. val1 = KNode.atomicValue(val1);
  132. val2 = KNode.atomicValue(val2);
  133. if (val1 instanceof UntypedAtomic)
  134. val1 = val1.toString();
  135. if (val2 instanceof UntypedAtomic)
  136. val2 = val2.toString();
  137. boolean empty1 = SequenceUtils.isEmptySequence(val1);
  138. boolean empty2 = SequenceUtils.isEmptySequence(val2);
  139. if (empty1 && empty2)
  140. continue;
  141. int c;
  142. if (empty1 || empty2)
  143. {
  144. char emptyOrder = flags.charAt(1);
  145. c = empty1 == (emptyOrder == 'L') ? -1 : 1;
  146. }
  147. else
  148. {
  149. boolean isNaN1 = val1 instanceof Number
  150. && Double.isNaN(((Number) val1).doubleValue());
  151. boolean isNaN2 = val2 instanceof Number
  152. && Double.isNaN(((Number) val2).doubleValue());
  153. if (isNaN1 && isNaN2)
  154. continue;
  155. if (isNaN1 || isNaN2)
  156. {
  157. char emptyOrder = flags.charAt(1);
  158. c = isNaN1 == (emptyOrder == 'L') ? -1 : 1;
  159. }
  160. else if (val1 instanceof Number && val2 instanceof Number)
  161. c = NumberCompare.compare(val1, val2, false);
  162. else
  163. c = collator.compare(val1.toString(), val2.toString());
  164. }
  165. if (c == 0)
  166. continue;
  167. return flags.charAt(0) == 'A' ? c : -c;
  168. }
  169. return 0;
  170. }
  171. /*
  172. * This is the actual sort function. Notice that it returns the new
  173. * head of the list. (It has to, because the head will not
  174. * generally be the same element after the sort.) So unlike sorting
  175. * an array, where you can do
  176. *
  177. * sort(myarray);
  178. *
  179. * you now have to do
  180. *
  181. * list = listsort(mylist);
  182. */
  183. int listsort(int list) throws Throwable
  184. {// indexes
  185. int p, q, e, tail;
  186. int insize, nmerges, psize, qsize, i;
  187. /*
  188. * Silly special case: if `list' was passed in as NULL, return
  189. * NULL immediately.
  190. */
  191. if (n == 0)
  192. return -1;
  193. next = new int[n];
  194. for (i = 1; ; i++)
  195. {
  196. if (i == n)
  197. {
  198. next[i-1] = -1;
  199. break;
  200. }
  201. else
  202. next[i-1] = i;
  203. }
  204. insize = 1;
  205. for (;;) {
  206. p = list;
  207. list = -1;
  208. tail = -1;
  209. nmerges = 0; /* count number of merges we do in this pass */
  210. while (p >= 0) {
  211. nmerges++; /* there exists a merge to be done */
  212. /* step `insize' places along from p */
  213. q = p;
  214. psize = 0;
  215. for (i = 0; i < insize; i++) {
  216. psize++;
  217. q = next[q];
  218. if (q < 0) break;
  219. }
  220. /* if q hasn't fallen off end, we have two lists to merge */
  221. qsize = insize;
  222. /* now we have two lists; merge them */
  223. while (psize > 0 || (qsize > 0 && q >= 0)) {
  224. /* decide whether next element of merge comes from p or q */
  225. if (psize == 0) {
  226. /* p is empty; e must come from q. */
  227. e = q; q = next[q]; qsize--;
  228. } else if (qsize == 0 || q < 0) {
  229. /* q is empty; e must come from p. */
  230. e = p; p = next[p]; psize--;
  231. } else if (cmp(p,q) <= 0) {
  232. /* First element of p is lower (or same);
  233. * e must come from p. */
  234. e = p; p = next[p]; psize--;
  235. } else {
  236. /* First element of q is lower; e must come from q. */
  237. e = q; q = next[q]; qsize--;
  238. }
  239. /* add the next element to the merged list */
  240. if (tail >= 0)
  241. next[tail] = e;
  242. else
  243. list = e;
  244. tail = e;
  245. }
  246. /* now p has stepped `insize' places along, and q has too */
  247. p = q;
  248. }
  249. next[tail] = -1;
  250. /* If we have done only one merge, we're finished. */
  251. if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
  252. return list;
  253. /* Otherwise repeat, merging lists twice the size */
  254. insize *= 2;
  255. }
  256. }
  257. }