virtio_transport.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. /*
  2. * virtio transport for vsock
  3. *
  4. * Copyright (C) 2013-2015 Red Hat, Inc.
  5. * Author: Asias He <asias@redhat.com>
  6. * Stefan Hajnoczi <stefanha@redhat.com>
  7. *
  8. * Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
  9. * early virtio-vsock proof-of-concept bits.
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2.
  12. */
  13. #include <linux/spinlock.h>
  14. #include <linux/module.h>
  15. #include <linux/list.h>
  16. #include <linux/atomic.h>
  17. #include <linux/virtio.h>
  18. #include <linux/virtio_ids.h>
  19. #include <linux/virtio_config.h>
  20. #include <linux/virtio_vsock.h>
  21. #include <net/sock.h>
  22. #include <linux/mutex.h>
  23. #include <net/af_vsock.h>
  24. static struct workqueue_struct *virtio_vsock_workqueue;
  25. static struct virtio_vsock *the_virtio_vsock;
  26. static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
  27. struct virtio_vsock {
  28. struct virtio_device *vdev;
  29. struct virtqueue *vqs[VSOCK_VQ_MAX];
  30. /* Virtqueue processing is deferred to a workqueue */
  31. struct work_struct tx_work;
  32. struct work_struct rx_work;
  33. struct work_struct event_work;
  34. /* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
  35. * must be accessed with tx_lock held.
  36. */
  37. struct mutex tx_lock;
  38. struct work_struct send_pkt_work;
  39. spinlock_t send_pkt_list_lock;
  40. struct list_head send_pkt_list;
  41. atomic_t queued_replies;
  42. /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
  43. * must be accessed with rx_lock held.
  44. */
  45. struct mutex rx_lock;
  46. int rx_buf_nr;
  47. int rx_buf_max_nr;
  48. /* The following fields are protected by event_lock.
  49. * vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
  50. */
  51. struct mutex event_lock;
  52. struct virtio_vsock_event event_list[8];
  53. u32 guest_cid;
  54. };
  55. static struct virtio_vsock *virtio_vsock_get(void)
  56. {
  57. return the_virtio_vsock;
  58. }
  59. static u32 virtio_transport_get_local_cid(void)
  60. {
  61. struct virtio_vsock *vsock = virtio_vsock_get();
  62. return vsock->guest_cid;
  63. }
  64. static void
  65. virtio_transport_send_pkt_work(struct work_struct *work)
  66. {
  67. struct virtio_vsock *vsock =
  68. container_of(work, struct virtio_vsock, send_pkt_work);
  69. struct virtqueue *vq;
  70. bool added = false;
  71. bool restart_rx = false;
  72. mutex_lock(&vsock->tx_lock);
  73. vq = vsock->vqs[VSOCK_VQ_TX];
  74. for (;;) {
  75. struct virtio_vsock_pkt *pkt;
  76. struct scatterlist hdr, buf, *sgs[2];
  77. int ret, in_sg = 0, out_sg = 0;
  78. bool reply;
  79. spin_lock_bh(&vsock->send_pkt_list_lock);
  80. if (list_empty(&vsock->send_pkt_list)) {
  81. spin_unlock_bh(&vsock->send_pkt_list_lock);
  82. break;
  83. }
  84. pkt = list_first_entry(&vsock->send_pkt_list,
  85. struct virtio_vsock_pkt, list);
  86. list_del_init(&pkt->list);
  87. spin_unlock_bh(&vsock->send_pkt_list_lock);
  88. reply = pkt->reply;
  89. sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
  90. sgs[out_sg++] = &hdr;
  91. if (pkt->buf) {
  92. sg_init_one(&buf, pkt->buf, pkt->len);
  93. sgs[out_sg++] = &buf;
  94. }
  95. ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
  96. /* Usually this means that there is no more space available in
  97. * the vq
  98. */
  99. if (ret < 0) {
  100. spin_lock_bh(&vsock->send_pkt_list_lock);
  101. list_add(&pkt->list, &vsock->send_pkt_list);
  102. spin_unlock_bh(&vsock->send_pkt_list_lock);
  103. break;
  104. }
  105. if (reply) {
  106. struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
  107. int val;
  108. val = atomic_dec_return(&vsock->queued_replies);
  109. /* Do we now have resources to resume rx processing? */
  110. if (val + 1 == virtqueue_get_vring_size(rx_vq))
  111. restart_rx = true;
  112. }
  113. added = true;
  114. }
  115. if (added)
  116. virtqueue_kick(vq);
  117. mutex_unlock(&vsock->tx_lock);
  118. if (restart_rx)
  119. queue_work(virtio_vsock_workqueue, &vsock->rx_work);
  120. }
  121. static int
  122. virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
  123. {
  124. struct virtio_vsock *vsock;
  125. int len = pkt->len;
  126. vsock = virtio_vsock_get();
  127. if (!vsock) {
  128. virtio_transport_free_pkt(pkt);
  129. return -ENODEV;
  130. }
  131. if (pkt->reply)
  132. atomic_inc(&vsock->queued_replies);
  133. spin_lock_bh(&vsock->send_pkt_list_lock);
  134. list_add_tail(&pkt->list, &vsock->send_pkt_list);
  135. spin_unlock_bh(&vsock->send_pkt_list_lock);
  136. queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
  137. return len;
  138. }
  139. static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
  140. {
  141. int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
  142. struct virtio_vsock_pkt *pkt;
  143. struct scatterlist hdr, buf, *sgs[2];
  144. struct virtqueue *vq;
  145. int ret;
  146. vq = vsock->vqs[VSOCK_VQ_RX];
  147. do {
  148. pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
  149. if (!pkt)
  150. break;
  151. pkt->buf = kmalloc(buf_len, GFP_KERNEL);
  152. if (!pkt->buf) {
  153. virtio_transport_free_pkt(pkt);
  154. break;
  155. }
  156. pkt->len = buf_len;
  157. sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
  158. sgs[0] = &hdr;
  159. sg_init_one(&buf, pkt->buf, buf_len);
  160. sgs[1] = &buf;
  161. ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
  162. if (ret) {
  163. virtio_transport_free_pkt(pkt);
  164. break;
  165. }
  166. vsock->rx_buf_nr++;
  167. } while (vq->num_free);
  168. if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
  169. vsock->rx_buf_max_nr = vsock->rx_buf_nr;
  170. virtqueue_kick(vq);
  171. }
  172. static void virtio_transport_tx_work(struct work_struct *work)
  173. {
  174. struct virtio_vsock *vsock =
  175. container_of(work, struct virtio_vsock, tx_work);
  176. struct virtqueue *vq;
  177. bool added = false;
  178. vq = vsock->vqs[VSOCK_VQ_TX];
  179. mutex_lock(&vsock->tx_lock);
  180. do {
  181. struct virtio_vsock_pkt *pkt;
  182. unsigned int len;
  183. virtqueue_disable_cb(vq);
  184. while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
  185. virtio_transport_free_pkt(pkt);
  186. added = true;
  187. }
  188. } while (!virtqueue_enable_cb(vq));
  189. mutex_unlock(&vsock->tx_lock);
  190. if (added)
  191. queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
  192. }
  193. /* Is there space left for replies to rx packets? */
  194. static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
  195. {
  196. struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX];
  197. int val;
  198. smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
  199. val = atomic_read(&vsock->queued_replies);
  200. return val < virtqueue_get_vring_size(vq);
  201. }
  202. static void virtio_transport_rx_work(struct work_struct *work)
  203. {
  204. struct virtio_vsock *vsock =
  205. container_of(work, struct virtio_vsock, rx_work);
  206. struct virtqueue *vq;
  207. vq = vsock->vqs[VSOCK_VQ_RX];
  208. mutex_lock(&vsock->rx_lock);
  209. do {
  210. virtqueue_disable_cb(vq);
  211. for (;;) {
  212. struct virtio_vsock_pkt *pkt;
  213. unsigned int len;
  214. if (!virtio_transport_more_replies(vsock)) {
  215. /* Stop rx until the device processes already
  216. * pending replies. Leave rx virtqueue
  217. * callbacks disabled.
  218. */
  219. goto out;
  220. }
  221. pkt = virtqueue_get_buf(vq, &len);
  222. if (!pkt) {
  223. break;
  224. }
  225. vsock->rx_buf_nr--;
  226. /* Drop short/long packets */
  227. if (unlikely(len < sizeof(pkt->hdr) ||
  228. len > sizeof(pkt->hdr) + pkt->len)) {
  229. virtio_transport_free_pkt(pkt);
  230. continue;
  231. }
  232. pkt->len = len - sizeof(pkt->hdr);
  233. virtio_transport_recv_pkt(pkt);
  234. }
  235. } while (!virtqueue_enable_cb(vq));
  236. out:
  237. if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
  238. virtio_vsock_rx_fill(vsock);
  239. mutex_unlock(&vsock->rx_lock);
  240. }
  241. /* event_lock must be held */
  242. static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
  243. struct virtio_vsock_event *event)
  244. {
  245. struct scatterlist sg;
  246. struct virtqueue *vq;
  247. vq = vsock->vqs[VSOCK_VQ_EVENT];
  248. sg_init_one(&sg, event, sizeof(*event));
  249. return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
  250. }
  251. /* event_lock must be held */
  252. static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
  253. {
  254. size_t i;
  255. for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
  256. struct virtio_vsock_event *event = &vsock->event_list[i];
  257. virtio_vsock_event_fill_one(vsock, event);
  258. }
  259. virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
  260. }
  261. static void virtio_vsock_reset_sock(struct sock *sk)
  262. {
  263. lock_sock(sk);
  264. sk->sk_state = SS_UNCONNECTED;
  265. sk->sk_err = ECONNRESET;
  266. sk->sk_error_report(sk);
  267. release_sock(sk);
  268. }
  269. static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
  270. {
  271. struct virtio_device *vdev = vsock->vdev;
  272. u64 guest_cid;
  273. vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
  274. &guest_cid, sizeof(guest_cid));
  275. vsock->guest_cid = le64_to_cpu(guest_cid);
  276. }
  277. /* event_lock must be held */
  278. static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
  279. struct virtio_vsock_event *event)
  280. {
  281. switch (le32_to_cpu(event->id)) {
  282. case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
  283. virtio_vsock_update_guest_cid(vsock);
  284. vsock_for_each_connected_socket(virtio_vsock_reset_sock);
  285. break;
  286. }
  287. }
  288. static void virtio_transport_event_work(struct work_struct *work)
  289. {
  290. struct virtio_vsock *vsock =
  291. container_of(work, struct virtio_vsock, event_work);
  292. struct virtqueue *vq;
  293. vq = vsock->vqs[VSOCK_VQ_EVENT];
  294. mutex_lock(&vsock->event_lock);
  295. do {
  296. struct virtio_vsock_event *event;
  297. unsigned int len;
  298. virtqueue_disable_cb(vq);
  299. while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
  300. if (len == sizeof(*event))
  301. virtio_vsock_event_handle(vsock, event);
  302. virtio_vsock_event_fill_one(vsock, event);
  303. }
  304. } while (!virtqueue_enable_cb(vq));
  305. virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
  306. mutex_unlock(&vsock->event_lock);
  307. }
  308. static void virtio_vsock_event_done(struct virtqueue *vq)
  309. {
  310. struct virtio_vsock *vsock = vq->vdev->priv;
  311. if (!vsock)
  312. return;
  313. queue_work(virtio_vsock_workqueue, &vsock->event_work);
  314. }
  315. static void virtio_vsock_tx_done(struct virtqueue *vq)
  316. {
  317. struct virtio_vsock *vsock = vq->vdev->priv;
  318. if (!vsock)
  319. return;
  320. queue_work(virtio_vsock_workqueue, &vsock->tx_work);
  321. }
  322. static void virtio_vsock_rx_done(struct virtqueue *vq)
  323. {
  324. struct virtio_vsock *vsock = vq->vdev->priv;
  325. if (!vsock)
  326. return;
  327. queue_work(virtio_vsock_workqueue, &vsock->rx_work);
  328. }
  329. static struct virtio_transport virtio_transport = {
  330. .transport = {
  331. .get_local_cid = virtio_transport_get_local_cid,
  332. .init = virtio_transport_do_socket_init,
  333. .destruct = virtio_transport_destruct,
  334. .release = virtio_transport_release,
  335. .connect = virtio_transport_connect,
  336. .shutdown = virtio_transport_shutdown,
  337. .dgram_bind = virtio_transport_dgram_bind,
  338. .dgram_dequeue = virtio_transport_dgram_dequeue,
  339. .dgram_enqueue = virtio_transport_dgram_enqueue,
  340. .dgram_allow = virtio_transport_dgram_allow,
  341. .stream_dequeue = virtio_transport_stream_dequeue,
  342. .stream_enqueue = virtio_transport_stream_enqueue,
  343. .stream_has_data = virtio_transport_stream_has_data,
  344. .stream_has_space = virtio_transport_stream_has_space,
  345. .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
  346. .stream_is_active = virtio_transport_stream_is_active,
  347. .stream_allow = virtio_transport_stream_allow,
  348. .notify_poll_in = virtio_transport_notify_poll_in,
  349. .notify_poll_out = virtio_transport_notify_poll_out,
  350. .notify_recv_init = virtio_transport_notify_recv_init,
  351. .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
  352. .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
  353. .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
  354. .notify_send_init = virtio_transport_notify_send_init,
  355. .notify_send_pre_block = virtio_transport_notify_send_pre_block,
  356. .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
  357. .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
  358. .set_buffer_size = virtio_transport_set_buffer_size,
  359. .set_min_buffer_size = virtio_transport_set_min_buffer_size,
  360. .set_max_buffer_size = virtio_transport_set_max_buffer_size,
  361. .get_buffer_size = virtio_transport_get_buffer_size,
  362. .get_min_buffer_size = virtio_transport_get_min_buffer_size,
  363. .get_max_buffer_size = virtio_transport_get_max_buffer_size,
  364. },
  365. .send_pkt = virtio_transport_send_pkt,
  366. };
  367. static int virtio_vsock_probe(struct virtio_device *vdev)
  368. {
  369. vq_callback_t *callbacks[] = {
  370. virtio_vsock_rx_done,
  371. virtio_vsock_tx_done,
  372. virtio_vsock_event_done,
  373. };
  374. static const char * const names[] = {
  375. "rx",
  376. "tx",
  377. "event",
  378. };
  379. struct virtio_vsock *vsock = NULL;
  380. int ret;
  381. ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
  382. if (ret)
  383. return ret;
  384. /* Only one virtio-vsock device per guest is supported */
  385. if (the_virtio_vsock) {
  386. ret = -EBUSY;
  387. goto out;
  388. }
  389. vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
  390. if (!vsock) {
  391. ret = -ENOMEM;
  392. goto out;
  393. }
  394. vsock->vdev = vdev;
  395. ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
  396. vsock->vqs, callbacks, names);
  397. if (ret < 0)
  398. goto out;
  399. virtio_vsock_update_guest_cid(vsock);
  400. ret = vsock_core_init(&virtio_transport.transport);
  401. if (ret < 0)
  402. goto out_vqs;
  403. vsock->rx_buf_nr = 0;
  404. vsock->rx_buf_max_nr = 0;
  405. atomic_set(&vsock->queued_replies, 0);
  406. vdev->priv = vsock;
  407. the_virtio_vsock = vsock;
  408. mutex_init(&vsock->tx_lock);
  409. mutex_init(&vsock->rx_lock);
  410. mutex_init(&vsock->event_lock);
  411. spin_lock_init(&vsock->send_pkt_list_lock);
  412. INIT_LIST_HEAD(&vsock->send_pkt_list);
  413. INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
  414. INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
  415. INIT_WORK(&vsock->event_work, virtio_transport_event_work);
  416. INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
  417. mutex_lock(&vsock->rx_lock);
  418. virtio_vsock_rx_fill(vsock);
  419. mutex_unlock(&vsock->rx_lock);
  420. mutex_lock(&vsock->event_lock);
  421. virtio_vsock_event_fill(vsock);
  422. mutex_unlock(&vsock->event_lock);
  423. mutex_unlock(&the_virtio_vsock_mutex);
  424. return 0;
  425. out_vqs:
  426. vsock->vdev->config->del_vqs(vsock->vdev);
  427. out:
  428. kfree(vsock);
  429. mutex_unlock(&the_virtio_vsock_mutex);
  430. return ret;
  431. }
  432. static void virtio_vsock_remove(struct virtio_device *vdev)
  433. {
  434. struct virtio_vsock *vsock = vdev->priv;
  435. struct virtio_vsock_pkt *pkt;
  436. flush_work(&vsock->rx_work);
  437. flush_work(&vsock->tx_work);
  438. flush_work(&vsock->event_work);
  439. flush_work(&vsock->send_pkt_work);
  440. vdev->config->reset(vdev);
  441. mutex_lock(&vsock->rx_lock);
  442. while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
  443. virtio_transport_free_pkt(pkt);
  444. mutex_unlock(&vsock->rx_lock);
  445. mutex_lock(&vsock->tx_lock);
  446. while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
  447. virtio_transport_free_pkt(pkt);
  448. mutex_unlock(&vsock->tx_lock);
  449. spin_lock_bh(&vsock->send_pkt_list_lock);
  450. while (!list_empty(&vsock->send_pkt_list)) {
  451. pkt = list_first_entry(&vsock->send_pkt_list,
  452. struct virtio_vsock_pkt, list);
  453. list_del(&pkt->list);
  454. virtio_transport_free_pkt(pkt);
  455. }
  456. spin_unlock_bh(&vsock->send_pkt_list_lock);
  457. mutex_lock(&the_virtio_vsock_mutex);
  458. the_virtio_vsock = NULL;
  459. vsock_core_exit();
  460. mutex_unlock(&the_virtio_vsock_mutex);
  461. vdev->config->del_vqs(vdev);
  462. kfree(vsock);
  463. }
  464. static struct virtio_device_id id_table[] = {
  465. { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
  466. { 0 },
  467. };
  468. static unsigned int features[] = {
  469. };
  470. static struct virtio_driver virtio_vsock_driver = {
  471. .feature_table = features,
  472. .feature_table_size = ARRAY_SIZE(features),
  473. .driver.name = KBUILD_MODNAME,
  474. .driver.owner = THIS_MODULE,
  475. .id_table = id_table,
  476. .probe = virtio_vsock_probe,
  477. .remove = virtio_vsock_remove,
  478. };
  479. static int __init virtio_vsock_init(void)
  480. {
  481. int ret;
  482. virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
  483. if (!virtio_vsock_workqueue)
  484. return -ENOMEM;
  485. ret = register_virtio_driver(&virtio_vsock_driver);
  486. if (ret)
  487. destroy_workqueue(virtio_vsock_workqueue);
  488. return ret;
  489. }
  490. static void __exit virtio_vsock_exit(void)
  491. {
  492. unregister_virtio_driver(&virtio_vsock_driver);
  493. destroy_workqueue(virtio_vsock_workqueue);
  494. }
  495. module_init(virtio_vsock_init);
  496. module_exit(virtio_vsock_exit);
  497. MODULE_LICENSE("GPL v2");
  498. MODULE_AUTHOR("Asias He");
  499. MODULE_DESCRIPTION("virtio transport for vsock");
  500. MODULE_DEVICE_TABLE(virtio, id_table);