vmci_transport_notify_qstate.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /*
  2. * VMware vSockets Driver
  3. *
  4. * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License as published by the Free
  8. * Software Foundation version 2 and no later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. */
  15. #include <linux/types.h>
  16. #include <linux/socket.h>
  17. #include <linux/stddef.h>
  18. #include <net/sock.h>
  19. #include "vmci_transport_notify.h"
  20. #define PKT_FIELD(vsk, field_name) \
  21. (vmci_trans(vsk)->notify.pkt_q_state.field_name)
  22. static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
  23. {
  24. bool retval;
  25. u64 notify_limit;
  26. if (!PKT_FIELD(vsk, peer_waiting_write))
  27. return false;
  28. /* When the sender blocks, we take that as a sign that the sender is
  29. * faster than the receiver. To reduce the transmit rate of the sender,
  30. * we delay the sending of the read notification by decreasing the
  31. * write_notify_window. The notification is delayed until the number of
  32. * bytes used in the queue drops below the write_notify_window.
  33. */
  34. if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
  35. PKT_FIELD(vsk, peer_waiting_write_detected) = true;
  36. if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
  37. PKT_FIELD(vsk, write_notify_window) =
  38. PKT_FIELD(vsk, write_notify_min_window);
  39. } else {
  40. PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
  41. if (PKT_FIELD(vsk, write_notify_window) <
  42. PKT_FIELD(vsk, write_notify_min_window))
  43. PKT_FIELD(vsk, write_notify_window) =
  44. PKT_FIELD(vsk, write_notify_min_window);
  45. }
  46. }
  47. notify_limit = vmci_trans(vsk)->consume_size -
  48. PKT_FIELD(vsk, write_notify_window);
  49. /* The notify_limit is used to delay notifications in the case where
  50. * flow control is enabled. Below the test is expressed in terms of
  51. * free space in the queue: if free_space > ConsumeSize -
  52. * write_notify_window then notify An alternate way of expressing this
  53. * is to rewrite the expression to use the data ready in the receive
  54. * queue: if write_notify_window > bufferReady then notify as
  55. * free_space == ConsumeSize - bufferReady.
  56. */
  57. retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
  58. notify_limit;
  59. if (retval) {
  60. /* Once we notify the peer, we reset the detected flag so the
  61. * next wait will again cause a decrease in the window size.
  62. */
  63. PKT_FIELD(vsk, peer_waiting_write_detected) = false;
  64. }
  65. return retval;
  66. }
  67. static void
  68. vmci_transport_handle_read(struct sock *sk,
  69. struct vmci_transport_packet *pkt,
  70. bool bottom_half,
  71. struct sockaddr_vm *dst, struct sockaddr_vm *src)
  72. {
  73. sk->sk_write_space(sk);
  74. }
  75. static void
  76. vmci_transport_handle_wrote(struct sock *sk,
  77. struct vmci_transport_packet *pkt,
  78. bool bottom_half,
  79. struct sockaddr_vm *dst, struct sockaddr_vm *src)
  80. {
  81. sk->sk_data_ready(sk);
  82. }
  83. static void vsock_block_update_write_window(struct sock *sk)
  84. {
  85. struct vsock_sock *vsk = vsock_sk(sk);
  86. if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
  87. PKT_FIELD(vsk, write_notify_window) =
  88. min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
  89. vmci_trans(vsk)->consume_size);
  90. }
  91. static int vmci_transport_send_read_notification(struct sock *sk)
  92. {
  93. struct vsock_sock *vsk;
  94. bool sent_read;
  95. unsigned int retries;
  96. int err;
  97. vsk = vsock_sk(sk);
  98. sent_read = false;
  99. retries = 0;
  100. err = 0;
  101. if (vmci_transport_notify_waiting_write(vsk)) {
  102. /* Notify the peer that we have read, retrying the send on
  103. * failure up to our maximum value. XXX For now we just log
  104. * the failure, but later we should schedule a work item to
  105. * handle the resend until it succeeds. That would require
  106. * keeping track of work items in the vsk and cleaning them up
  107. * upon socket close.
  108. */
  109. while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
  110. !sent_read &&
  111. retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
  112. err = vmci_transport_send_read(sk);
  113. if (err >= 0)
  114. sent_read = true;
  115. retries++;
  116. }
  117. if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
  118. pr_err("%p unable to send read notification to peer\n",
  119. sk);
  120. else
  121. PKT_FIELD(vsk, peer_waiting_write) = false;
  122. }
  123. return err;
  124. }
  125. static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
  126. {
  127. struct vsock_sock *vsk = vsock_sk(sk);
  128. PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
  129. PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
  130. PKT_FIELD(vsk, peer_waiting_write) = false;
  131. PKT_FIELD(vsk, peer_waiting_write_detected) = false;
  132. }
  133. static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
  134. {
  135. PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
  136. PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
  137. PKT_FIELD(vsk, peer_waiting_write) = false;
  138. PKT_FIELD(vsk, peer_waiting_write_detected) = false;
  139. }
  140. static int
  141. vmci_transport_notify_pkt_poll_in(struct sock *sk,
  142. size_t target, bool *data_ready_now)
  143. {
  144. struct vsock_sock *vsk = vsock_sk(sk);
  145. if (vsock_stream_has_data(vsk)) {
  146. *data_ready_now = true;
  147. } else {
  148. /* We can't read right now because there is nothing in the
  149. * queue. Ask for notifications when there is something to
  150. * read.
  151. */
  152. if (sk->sk_state == TCP_ESTABLISHED)
  153. vsock_block_update_write_window(sk);
  154. *data_ready_now = false;
  155. }
  156. return 0;
  157. }
  158. static int
  159. vmci_transport_notify_pkt_poll_out(struct sock *sk,
  160. size_t target, bool *space_avail_now)
  161. {
  162. s64 produce_q_free_space;
  163. struct vsock_sock *vsk = vsock_sk(sk);
  164. produce_q_free_space = vsock_stream_has_space(vsk);
  165. if (produce_q_free_space > 0) {
  166. *space_avail_now = true;
  167. return 0;
  168. } else if (produce_q_free_space == 0) {
  169. /* This is a connected socket but we can't currently send data.
  170. * Nothing else to do.
  171. */
  172. *space_avail_now = false;
  173. }
  174. return 0;
  175. }
  176. static int
  177. vmci_transport_notify_pkt_recv_init(
  178. struct sock *sk,
  179. size_t target,
  180. struct vmci_transport_recv_notify_data *data)
  181. {
  182. struct vsock_sock *vsk = vsock_sk(sk);
  183. data->consume_head = 0;
  184. data->produce_tail = 0;
  185. data->notify_on_block = false;
  186. if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
  187. PKT_FIELD(vsk, write_notify_min_window) = target + 1;
  188. if (PKT_FIELD(vsk, write_notify_window) <
  189. PKT_FIELD(vsk, write_notify_min_window)) {
  190. /* If the current window is smaller than the new
  191. * minimal window size, we need to reevaluate whether
  192. * we need to notify the sender. If the number of ready
  193. * bytes are smaller than the new window, we need to
  194. * send a notification to the sender before we block.
  195. */
  196. PKT_FIELD(vsk, write_notify_window) =
  197. PKT_FIELD(vsk, write_notify_min_window);
  198. data->notify_on_block = true;
  199. }
  200. }
  201. return 0;
  202. }
  203. static int
  204. vmci_transport_notify_pkt_recv_pre_block(
  205. struct sock *sk,
  206. size_t target,
  207. struct vmci_transport_recv_notify_data *data)
  208. {
  209. int err = 0;
  210. vsock_block_update_write_window(sk);
  211. if (data->notify_on_block) {
  212. err = vmci_transport_send_read_notification(sk);
  213. if (err < 0)
  214. return err;
  215. data->notify_on_block = false;
  216. }
  217. return err;
  218. }
  219. static int
  220. vmci_transport_notify_pkt_recv_post_dequeue(
  221. struct sock *sk,
  222. size_t target,
  223. ssize_t copied,
  224. bool data_read,
  225. struct vmci_transport_recv_notify_data *data)
  226. {
  227. struct vsock_sock *vsk;
  228. int err;
  229. bool was_full = false;
  230. u64 free_space;
  231. vsk = vsock_sk(sk);
  232. err = 0;
  233. if (data_read) {
  234. smp_mb();
  235. free_space =
  236. vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
  237. was_full = free_space == copied;
  238. if (was_full)
  239. PKT_FIELD(vsk, peer_waiting_write) = true;
  240. err = vmci_transport_send_read_notification(sk);
  241. if (err < 0)
  242. return err;
  243. /* See the comment in
  244. * vmci_transport_notify_pkt_send_post_enqueue().
  245. */
  246. sk->sk_data_ready(sk);
  247. }
  248. return err;
  249. }
  250. static int
  251. vmci_transport_notify_pkt_send_init(
  252. struct sock *sk,
  253. struct vmci_transport_send_notify_data *data)
  254. {
  255. data->consume_head = 0;
  256. data->produce_tail = 0;
  257. return 0;
  258. }
  259. static int
  260. vmci_transport_notify_pkt_send_post_enqueue(
  261. struct sock *sk,
  262. ssize_t written,
  263. struct vmci_transport_send_notify_data *data)
  264. {
  265. int err = 0;
  266. struct vsock_sock *vsk;
  267. bool sent_wrote = false;
  268. bool was_empty;
  269. int retries = 0;
  270. vsk = vsock_sk(sk);
  271. smp_mb();
  272. was_empty =
  273. vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
  274. if (was_empty) {
  275. while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
  276. !sent_wrote &&
  277. retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
  278. err = vmci_transport_send_wrote(sk);
  279. if (err >= 0)
  280. sent_wrote = true;
  281. retries++;
  282. }
  283. }
  284. if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
  285. pr_err("%p unable to send wrote notification to peer\n",
  286. sk);
  287. return err;
  288. }
  289. return err;
  290. }
  291. static void
  292. vmci_transport_notify_pkt_handle_pkt(
  293. struct sock *sk,
  294. struct vmci_transport_packet *pkt,
  295. bool bottom_half,
  296. struct sockaddr_vm *dst,
  297. struct sockaddr_vm *src, bool *pkt_processed)
  298. {
  299. bool processed = false;
  300. switch (pkt->type) {
  301. case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
  302. vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
  303. processed = true;
  304. break;
  305. case VMCI_TRANSPORT_PACKET_TYPE_READ:
  306. vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
  307. processed = true;
  308. break;
  309. }
  310. if (pkt_processed)
  311. *pkt_processed = processed;
  312. }
  313. static void vmci_transport_notify_pkt_process_request(struct sock *sk)
  314. {
  315. struct vsock_sock *vsk = vsock_sk(sk);
  316. PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
  317. if (vmci_trans(vsk)->consume_size <
  318. PKT_FIELD(vsk, write_notify_min_window))
  319. PKT_FIELD(vsk, write_notify_min_window) =
  320. vmci_trans(vsk)->consume_size;
  321. }
  322. static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
  323. {
  324. struct vsock_sock *vsk = vsock_sk(sk);
  325. PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
  326. if (vmci_trans(vsk)->consume_size <
  327. PKT_FIELD(vsk, write_notify_min_window))
  328. PKT_FIELD(vsk, write_notify_min_window) =
  329. vmci_trans(vsk)->consume_size;
  330. }
  331. static int
  332. vmci_transport_notify_pkt_recv_pre_dequeue(
  333. struct sock *sk,
  334. size_t target,
  335. struct vmci_transport_recv_notify_data *data)
  336. {
  337. return 0; /* NOP for QState. */
  338. }
  339. static int
  340. vmci_transport_notify_pkt_send_pre_block(
  341. struct sock *sk,
  342. struct vmci_transport_send_notify_data *data)
  343. {
  344. return 0; /* NOP for QState. */
  345. }
  346. static int
  347. vmci_transport_notify_pkt_send_pre_enqueue(
  348. struct sock *sk,
  349. struct vmci_transport_send_notify_data *data)
  350. {
  351. return 0; /* NOP for QState. */
  352. }
  353. /* Socket always on control packet based operations. */
  354. const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
  355. .socket_init = vmci_transport_notify_pkt_socket_init,
  356. .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
  357. .poll_in = vmci_transport_notify_pkt_poll_in,
  358. .poll_out = vmci_transport_notify_pkt_poll_out,
  359. .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
  360. .recv_init = vmci_transport_notify_pkt_recv_init,
  361. .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
  362. .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
  363. .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
  364. .send_init = vmci_transport_notify_pkt_send_init,
  365. .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
  366. .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
  367. .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
  368. .process_request = vmci_transport_notify_pkt_process_request,
  369. .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
  370. };