ackvec.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /*
  2. * net/dccp/ackvec.c
  3. *
  4. * An implementation of Ack Vectors for the DCCP protocol
  5. * Copyright (c) 2007 University of Aberdeen, Scotland, UK
  6. * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  7. *
  8. * This program is free software; you can redistribute it and/or modify it
  9. * under the terms of the GNU General Public License as published by the
  10. * Free Software Foundation; version 2 of the License;
  11. */
  12. #include "dccp.h"
  13. #include <linux/kernel.h>
  14. #include <linux/slab.h>
  15. static struct kmem_cache *dccp_ackvec_slab;
  16. static struct kmem_cache *dccp_ackvec_record_slab;
  17. struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
  18. {
  19. struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
  20. if (av != NULL) {
  21. av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
  22. INIT_LIST_HEAD(&av->av_records);
  23. }
  24. return av;
  25. }
  26. static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
  27. {
  28. struct dccp_ackvec_record *cur, *next;
  29. list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
  30. kmem_cache_free(dccp_ackvec_record_slab, cur);
  31. INIT_LIST_HEAD(&av->av_records);
  32. }
  33. void dccp_ackvec_free(struct dccp_ackvec *av)
  34. {
  35. if (likely(av != NULL)) {
  36. dccp_ackvec_purge_records(av);
  37. kmem_cache_free(dccp_ackvec_slab, av);
  38. }
  39. }
  40. /**
  41. * dccp_ackvec_update_records - Record information about sent Ack Vectors
  42. * @av: Ack Vector records to update
  43. * @seqno: Sequence number of the packet carrying the Ack Vector just sent
  44. * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
  45. */
  46. int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
  47. {
  48. struct dccp_ackvec_record *avr;
  49. avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
  50. if (avr == NULL)
  51. return -ENOBUFS;
  52. avr->avr_ack_seqno = seqno;
  53. avr->avr_ack_ptr = av->av_buf_head;
  54. avr->avr_ack_ackno = av->av_buf_ackno;
  55. avr->avr_ack_nonce = nonce_sum;
  56. avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
  57. /*
  58. * When the buffer overflows, we keep no more than one record. This is
  59. * the simplest way of disambiguating sender-Acks dating from before the
  60. * overflow from sender-Acks which refer to after the overflow; a simple
  61. * solution is preferable here since we are handling an exception.
  62. */
  63. if (av->av_overflow)
  64. dccp_ackvec_purge_records(av);
  65. /*
  66. * Since GSS is incremented for each packet, the list is automatically
  67. * arranged in descending order of @ack_seqno.
  68. */
  69. list_add(&avr->avr_node, &av->av_records);
  70. dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
  71. (unsigned long long)avr->avr_ack_seqno,
  72. (unsigned long long)avr->avr_ack_ackno,
  73. avr->avr_ack_runlen);
  74. return 0;
  75. }
  76. static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
  77. const u64 ackno)
  78. {
  79. struct dccp_ackvec_record *avr;
  80. /*
  81. * Exploit that records are inserted in descending order of sequence
  82. * number, start with the oldest record first. If @ackno is `before'
  83. * the earliest ack_ackno, the packet is too old to be considered.
  84. */
  85. list_for_each_entry_reverse(avr, av_list, avr_node) {
  86. if (avr->avr_ack_seqno == ackno)
  87. return avr;
  88. if (before48(ackno, avr->avr_ack_seqno))
  89. break;
  90. }
  91. return NULL;
  92. }
  93. /*
  94. * Buffer index and length computation using modulo-buffersize arithmetic.
  95. * Note that, as pointers move from right to left, head is `before' tail.
  96. */
  97. static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
  98. {
  99. return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
  100. }
  101. static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
  102. {
  103. return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
  104. }
  105. u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
  106. {
  107. if (unlikely(av->av_overflow))
  108. return DCCPAV_MAX_ACKVEC_LEN;
  109. return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
  110. }
  111. /**
  112. * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
  113. * @av: non-empty buffer to update
  114. * @distance: negative or zero distance of @seqno from buf_ackno downward
  115. * @seqno: the (old) sequence number whose record is to be updated
  116. * @state: state in which packet carrying @seqno was received
  117. */
  118. static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
  119. u64 seqno, enum dccp_ackvec_states state)
  120. {
  121. u16 ptr = av->av_buf_head;
  122. BUG_ON(distance > 0);
  123. if (unlikely(dccp_ackvec_is_empty(av)))
  124. return;
  125. do {
  126. u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
  127. if (distance + runlen >= 0) {
  128. /*
  129. * Only update the state if packet has not been received
  130. * yet. This is OK as per the second table in RFC 4340,
  131. * 11.4.1; i.e. here we are using the following table:
  132. * RECEIVED
  133. * 0 1 3
  134. * S +---+---+---+
  135. * T 0 | 0 | 0 | 0 |
  136. * O +---+---+---+
  137. * R 1 | 1 | 1 | 1 |
  138. * E +---+---+---+
  139. * D 3 | 0 | 1 | 3 |
  140. * +---+---+---+
  141. * The "Not Received" state was set by reserve_seats().
  142. */
  143. if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
  144. av->av_buf[ptr] = state;
  145. else
  146. dccp_pr_debug("Not changing %llu state to %u\n",
  147. (unsigned long long)seqno, state);
  148. break;
  149. }
  150. distance += runlen + 1;
  151. ptr = __ackvec_idx_add(ptr, 1);
  152. } while (ptr != av->av_buf_tail);
  153. }
  154. /* Mark @num entries after buf_head as "Not yet received". */
  155. static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
  156. {
  157. u16 start = __ackvec_idx_add(av->av_buf_head, 1),
  158. len = DCCPAV_MAX_ACKVEC_LEN - start;
  159. /* check for buffer wrap-around */
  160. if (num > len) {
  161. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
  162. start = 0;
  163. num -= len;
  164. }
  165. if (num)
  166. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
  167. }
  168. /**
  169. * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
  170. * @av: container of buffer to update (can be empty or non-empty)
  171. * @num_packets: number of packets to register (must be >= 1)
  172. * @seqno: sequence number of the first packet in @num_packets
  173. * @state: state in which packet carrying @seqno was received
  174. */
  175. static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
  176. u64 seqno, enum dccp_ackvec_states state)
  177. {
  178. u32 num_cells = num_packets;
  179. if (num_packets > DCCPAV_BURST_THRESH) {
  180. u32 lost_packets = num_packets - 1;
  181. DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
  182. /*
  183. * We received 1 packet and have a loss of size "num_packets-1"
  184. * which we squeeze into num_cells-1 rather than reserving an
  185. * entire byte for each lost packet.
  186. * The reason is that the vector grows in O(burst_length); when
  187. * it grows too large there will no room left for the payload.
  188. * This is a trade-off: if a few packets out of the burst show
  189. * up later, their state will not be changed; it is simply too
  190. * costly to reshuffle/reallocate/copy the buffer each time.
  191. * Should such problems persist, we will need to switch to a
  192. * different underlying data structure.
  193. */
  194. for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
  195. u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
  196. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
  197. av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
  198. lost_packets -= len;
  199. }
  200. }
  201. if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
  202. DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
  203. av->av_overflow = true;
  204. }
  205. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
  206. if (av->av_overflow)
  207. av->av_buf_tail = av->av_buf_head;
  208. av->av_buf[av->av_buf_head] = state;
  209. av->av_buf_ackno = seqno;
  210. if (num_packets > 1)
  211. dccp_ackvec_reserve_seats(av, num_packets - 1);
  212. }
  213. /**
  214. * dccp_ackvec_input - Register incoming packet in the buffer
  215. */
  216. void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
  217. {
  218. u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
  219. enum dccp_ackvec_states state = DCCPAV_RECEIVED;
  220. if (dccp_ackvec_is_empty(av)) {
  221. dccp_ackvec_add_new(av, 1, seqno, state);
  222. av->av_tail_ackno = seqno;
  223. } else {
  224. s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
  225. u8 *current_head = av->av_buf + av->av_buf_head;
  226. if (num_packets == 1 &&
  227. dccp_ackvec_state(current_head) == state &&
  228. dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
  229. *current_head += 1;
  230. av->av_buf_ackno = seqno;
  231. } else if (num_packets > 0) {
  232. dccp_ackvec_add_new(av, num_packets, seqno, state);
  233. } else {
  234. dccp_ackvec_update_old(av, num_packets, seqno, state);
  235. }
  236. }
  237. }
  238. /**
  239. * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
  240. * This routine is called when the peer acknowledges the receipt of Ack Vectors
  241. * up to and including @ackno. While based on on section A.3 of RFC 4340, here
  242. * are additional precautions to prevent corrupted buffer state. In particular,
  243. * we use tail_ackno to identify outdated records; it always marks the earliest
  244. * packet of group (2) in 11.4.2.
  245. */
  246. void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
  247. {
  248. struct dccp_ackvec_record *avr, *next;
  249. u8 runlen_now, eff_runlen;
  250. s64 delta;
  251. avr = dccp_ackvec_lookup(&av->av_records, ackno);
  252. if (avr == NULL)
  253. return;
  254. /*
  255. * Deal with outdated acknowledgments: this arises when e.g. there are
  256. * several old records and the acks from the peer come in slowly. In
  257. * that case we may still have records that pre-date tail_ackno.
  258. */
  259. delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
  260. if (delta < 0)
  261. goto free_records;
  262. /*
  263. * Deal with overlapping Ack Vectors: don't subtract more than the
  264. * number of packets between tail_ackno and ack_ackno.
  265. */
  266. eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
  267. runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
  268. /*
  269. * The run length of Ack Vector cells does not decrease over time. If
  270. * the run length is the same as at the time the Ack Vector was sent, we
  271. * free the ack_ptr cell. That cell can however not be freed if the run
  272. * length has increased: in this case we need to move the tail pointer
  273. * backwards (towards higher indices), to its next-oldest neighbour.
  274. */
  275. if (runlen_now > eff_runlen) {
  276. av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
  277. av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
  278. /* This move may not have cleared the overflow flag. */
  279. if (av->av_overflow)
  280. av->av_overflow = (av->av_buf_head == av->av_buf_tail);
  281. } else {
  282. av->av_buf_tail = avr->avr_ack_ptr;
  283. /*
  284. * We have made sure that avr points to a valid cell within the
  285. * buffer. This cell is either older than head, or equals head
  286. * (empty buffer): in both cases we no longer have any overflow.
  287. */
  288. av->av_overflow = 0;
  289. }
  290. /*
  291. * The peer has acknowledged up to and including ack_ackno. Hence the
  292. * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
  293. */
  294. av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
  295. free_records:
  296. list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
  297. list_del(&avr->avr_node);
  298. kmem_cache_free(dccp_ackvec_record_slab, avr);
  299. }
  300. }
  301. /*
  302. * Routines to keep track of Ack Vectors received in an skb
  303. */
  304. int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
  305. {
  306. struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
  307. if (new == NULL)
  308. return -ENOBUFS;
  309. new->vec = vec;
  310. new->len = len;
  311. new->nonce = nonce;
  312. list_add_tail(&new->node, head);
  313. return 0;
  314. }
  315. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
  316. void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
  317. {
  318. struct dccp_ackvec_parsed *cur, *next;
  319. list_for_each_entry_safe(cur, next, parsed_chunks, node)
  320. kfree(cur);
  321. INIT_LIST_HEAD(parsed_chunks);
  322. }
  323. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
  324. int __init dccp_ackvec_init(void)
  325. {
  326. dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
  327. sizeof(struct dccp_ackvec), 0,
  328. SLAB_HWCACHE_ALIGN, NULL);
  329. if (dccp_ackvec_slab == NULL)
  330. goto out_err;
  331. dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
  332. sizeof(struct dccp_ackvec_record),
  333. 0, SLAB_HWCACHE_ALIGN, NULL);
  334. if (dccp_ackvec_record_slab == NULL)
  335. goto out_destroy_slab;
  336. return 0;
  337. out_destroy_slab:
  338. kmem_cache_destroy(dccp_ackvec_slab);
  339. dccp_ackvec_slab = NULL;
  340. out_err:
  341. DCCP_CRIT("Unable to create Ack Vector slab cache");
  342. return -ENOBUFS;
  343. }
  344. void dccp_ackvec_exit(void)
  345. {
  346. if (dccp_ackvec_slab != NULL) {
  347. kmem_cache_destroy(dccp_ackvec_slab);
  348. dccp_ackvec_slab = NULL;
  349. }
  350. if (dccp_ackvec_record_slab != NULL) {
  351. kmem_cache_destroy(dccp_ackvec_record_slab);
  352. dccp_ackvec_record_slab = NULL;
  353. }
  354. }