trans_virtio.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /*
  2. * The Virtio 9p transport driver
  3. *
  4. * This is a block based transport driver based on the lguest block driver
  5. * code.
  6. *
  7. * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
  8. *
  9. * Based on virtio console driver
  10. * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2
  14. * as published by the Free Software Foundation.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, write to:
  23. * Free Software Foundation
  24. * 51 Franklin Street, Fifth Floor
  25. * Boston, MA 02111-1301 USA
  26. *
  27. */
  28. #include <linux/in.h>
  29. #include <linux/module.h>
  30. #include <linux/net.h>
  31. #include <linux/ipv6.h>
  32. #include <linux/errno.h>
  33. #include <linux/kernel.h>
  34. #include <linux/un.h>
  35. #include <linux/uaccess.h>
  36. #include <linux/inet.h>
  37. #include <linux/idr.h>
  38. #include <linux/file.h>
  39. #include <linux/slab.h>
  40. #include <net/9p/9p.h>
  41. #include <linux/parser.h>
  42. #include <net/9p/client.h>
  43. #include <net/9p/transport.h>
  44. #include <linux/scatterlist.h>
  45. #include <linux/swap.h>
  46. #include <linux/virtio.h>
  47. #include <linux/virtio_9p.h>
  48. #include "trans_common.h"
  49. #define VIRTQUEUE_NUM 128
  50. /* a single mutex to manage channel initialization and attachment */
  51. static DEFINE_MUTEX(virtio_9p_lock);
  52. static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
  53. static atomic_t vp_pinned = ATOMIC_INIT(0);
  54. /**
  55. * struct virtio_chan - per-instance transport information
  56. * @initialized: whether the channel is initialized
  57. * @inuse: whether the channel is in use
  58. * @lock: protects multiple elements within this structure
  59. * @client: client instance
  60. * @vdev: virtio dev associated with this channel
  61. * @vq: virtio queue associated with this channel
  62. * @sg: scatter gather list which is used to pack a request (protected?)
  63. *
  64. * We keep all per-channel information in a structure.
  65. * This structure is allocated within the devices dev->mem space.
  66. * A pointer to the structure will get put in the transport private.
  67. *
  68. */
  69. struct virtio_chan {
  70. bool inuse;
  71. spinlock_t lock;
  72. struct p9_client *client;
  73. struct virtio_device *vdev;
  74. struct virtqueue *vq;
  75. int ring_bufs_avail;
  76. wait_queue_head_t *vc_wq;
  77. /* This is global limit. Since we don't have a global structure,
  78. * will be placing it in each channel.
  79. */
  80. int p9_max_pages;
  81. /* Scatterlist: can be too big for stack. */
  82. struct scatterlist sg[VIRTQUEUE_NUM];
  83. int tag_len;
  84. /*
  85. * tag name to identify a mount Non-null terminated
  86. */
  87. char *tag;
  88. struct list_head chan_list;
  89. };
  90. static struct list_head virtio_chan_list;
  91. /* How many bytes left in this page. */
  92. static unsigned int rest_of_page(void *data)
  93. {
  94. return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
  95. }
  96. /**
  97. * p9_virtio_close - reclaim resources of a channel
  98. * @client: client instance
  99. *
  100. * This reclaims a channel by freeing its resources and
  101. * reseting its inuse flag.
  102. *
  103. */
  104. static void p9_virtio_close(struct p9_client *client)
  105. {
  106. struct virtio_chan *chan = client->trans;
  107. mutex_lock(&virtio_9p_lock);
  108. if (chan)
  109. chan->inuse = false;
  110. mutex_unlock(&virtio_9p_lock);
  111. }
  112. /**
  113. * req_done - callback which signals activity from the server
  114. * @vq: virtio queue activity was received on
  115. *
  116. * This notifies us that the server has triggered some activity
  117. * on the virtio channel - most likely a response to request we
  118. * sent. Figure out which requests now have responses and wake up
  119. * those threads.
  120. *
  121. * Bugs: could do with some additional sanity checking, but appears to work.
  122. *
  123. */
  124. static void req_done(struct virtqueue *vq)
  125. {
  126. struct virtio_chan *chan = vq->vdev->priv;
  127. struct p9_fcall *rc;
  128. unsigned int len;
  129. struct p9_req_t *req;
  130. unsigned long flags;
  131. P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
  132. while (1) {
  133. spin_lock_irqsave(&chan->lock, flags);
  134. rc = virtqueue_get_buf(chan->vq, &len);
  135. if (rc == NULL) {
  136. spin_unlock_irqrestore(&chan->lock, flags);
  137. break;
  138. }
  139. chan->ring_bufs_avail = 1;
  140. spin_unlock_irqrestore(&chan->lock, flags);
  141. /* Wakeup if anyone waiting for VirtIO ring space. */
  142. wake_up(chan->vc_wq);
  143. P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
  144. P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
  145. req = p9_tag_lookup(chan->client, rc->tag);
  146. if (req->tc->private) {
  147. struct trans_rpage_info *rp = req->tc->private;
  148. int p = rp->rp_nr_pages;
  149. /*Release pages */
  150. p9_release_req_pages(rp);
  151. atomic_sub(p, &vp_pinned);
  152. wake_up(&vp_wq);
  153. if (rp->rp_alloc)
  154. kfree(rp);
  155. req->tc->private = NULL;
  156. }
  157. req->status = REQ_STATUS_RCVD;
  158. p9_client_cb(chan->client, req);
  159. }
  160. }
  161. /**
  162. * pack_sg_list - pack a scatter gather list from a linear buffer
  163. * @sg: scatter/gather list to pack into
  164. * @start: which segment of the sg_list to start at
  165. * @limit: maximum segment to pack data to
  166. * @data: data to pack into scatter/gather list
  167. * @count: amount of data to pack into the scatter/gather list
  168. *
  169. * sg_lists have multiple segments of various sizes. This will pack
  170. * arbitrary data into an existing scatter gather list, segmenting the
  171. * data as necessary within constraints.
  172. *
  173. */
  174. static int
  175. pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  176. int count)
  177. {
  178. int s;
  179. int index = start;
  180. while (count) {
  181. s = rest_of_page(data);
  182. if (s > count)
  183. s = count;
  184. sg_set_buf(&sg[index++], data, s);
  185. count -= s;
  186. data += s;
  187. BUG_ON(index > limit);
  188. }
  189. return index-start;
  190. }
  191. /* We don't currently allow canceling of virtio requests */
  192. static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
  193. {
  194. return 1;
  195. }
  196. /**
  197. * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
  198. * this takes a list of pages.
  199. * @sg: scatter/gather list to pack into
  200. * @start: which segment of the sg_list to start at
  201. * @pdata_off: Offset into the first page
  202. * @**pdata: a list of pages to add into sg.
  203. * @count: amount of data to pack into the scatter/gather list
  204. */
  205. static int
  206. pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
  207. struct page **pdata, int count)
  208. {
  209. int s;
  210. int i = 0;
  211. int index = start;
  212. if (pdata_off) {
  213. s = min((int)(PAGE_SIZE - pdata_off), count);
  214. sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
  215. count -= s;
  216. }
  217. while (count) {
  218. BUG_ON(index > limit);
  219. s = min((int)PAGE_SIZE, count);
  220. sg_set_page(&sg[index++], pdata[i++], s, 0);
  221. count -= s;
  222. }
  223. return index-start;
  224. }
  225. /**
  226. * p9_virtio_request - issue a request
  227. * @client: client instance issuing the request
  228. * @req: request to be issued
  229. *
  230. */
  231. static int
  232. p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  233. {
  234. int in, out, inp, outp;
  235. struct virtio_chan *chan = client->trans;
  236. unsigned long flags;
  237. size_t pdata_off = 0;
  238. struct trans_rpage_info *rpinfo = NULL;
  239. int err, pdata_len = 0;
  240. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
  241. req->status = REQ_STATUS_SENT;
  242. if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
  243. int nr_pages = p9_nr_pages(req);
  244. int rpinfo_size = sizeof(struct trans_rpage_info) +
  245. sizeof(struct page *) * nr_pages;
  246. if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
  247. err = wait_event_interruptible(vp_wq,
  248. atomic_read(&vp_pinned) < chan->p9_max_pages);
  249. if (err == -ERESTARTSYS)
  250. return err;
  251. P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
  252. }
  253. if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
  254. /* We can use sdata */
  255. req->tc->private = req->tc->sdata + req->tc->size;
  256. rpinfo = (struct trans_rpage_info *)req->tc->private;
  257. rpinfo->rp_alloc = 0;
  258. } else {
  259. req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
  260. if (!req->tc->private) {
  261. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
  262. "private kmalloc returned NULL");
  263. return -ENOMEM;
  264. }
  265. rpinfo = (struct trans_rpage_info *)req->tc->private;
  266. rpinfo->rp_alloc = 1;
  267. }
  268. err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
  269. req->tc->id == P9_TREAD ? 1 : 0);
  270. if (err < 0) {
  271. if (rpinfo->rp_alloc)
  272. kfree(rpinfo);
  273. return err;
  274. } else {
  275. atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
  276. }
  277. }
  278. req_retry_pinned:
  279. spin_lock_irqsave(&chan->lock, flags);
  280. /* Handle out VirtIO ring buffers */
  281. out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
  282. req->tc->size);
  283. if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
  284. /* We have additional write payload buffer to take care */
  285. if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
  286. outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
  287. pdata_off, rpinfo->rp_data, pdata_len);
  288. } else {
  289. char *pbuf;
  290. if (req->tc->pubuf)
  291. pbuf = (__force char *) req->tc->pubuf;
  292. else
  293. pbuf = req->tc->pkbuf;
  294. outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
  295. req->tc->pbuf_size);
  296. }
  297. out += outp;
  298. }
  299. /* Handle in VirtIO ring buffers */
  300. if (req->tc->pbuf_size &&
  301. ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
  302. /*
  303. * Take care of additional Read payload.
  304. * 11 is the read/write header = PDU Header(7) + IO Size (4).
  305. * Arrange in such a way that server places header in the
  306. * alloced memory and payload onto the user buffer.
  307. */
  308. inp = pack_sg_list(chan->sg, out,
  309. VIRTQUEUE_NUM, req->rc->sdata, 11);
  310. /*
  311. * Running executables in the filesystem may result in
  312. * a read request with kernel buffer as opposed to user buffer.
  313. */
  314. if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
  315. in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
  316. pdata_off, rpinfo->rp_data, pdata_len);
  317. } else {
  318. char *pbuf;
  319. if (req->tc->pubuf)
  320. pbuf = (__force char *) req->tc->pubuf;
  321. else
  322. pbuf = req->tc->pkbuf;
  323. in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
  324. pbuf, req->tc->pbuf_size);
  325. }
  326. in += inp;
  327. } else {
  328. in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
  329. req->rc->sdata, req->rc->capacity);
  330. }
  331. err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
  332. if (err < 0) {
  333. if (err == -ENOSPC) {
  334. chan->ring_bufs_avail = 0;
  335. spin_unlock_irqrestore(&chan->lock, flags);
  336. err = wait_event_interruptible(*chan->vc_wq,
  337. chan->ring_bufs_avail);
  338. if (err == -ERESTARTSYS)
  339. return err;
  340. P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
  341. goto req_retry_pinned;
  342. } else {
  343. spin_unlock_irqrestore(&chan->lock, flags);
  344. P9_DPRINTK(P9_DEBUG_TRANS,
  345. "9p debug: "
  346. "virtio rpc add_buf returned failure");
  347. if (rpinfo && rpinfo->rp_alloc)
  348. kfree(rpinfo);
  349. return -EIO;
  350. }
  351. }
  352. virtqueue_kick(chan->vq);
  353. spin_unlock_irqrestore(&chan->lock, flags);
  354. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
  355. return 0;
  356. }
  357. static ssize_t p9_mount_tag_show(struct device *dev,
  358. struct device_attribute *attr, char *buf)
  359. {
  360. struct virtio_chan *chan;
  361. struct virtio_device *vdev;
  362. vdev = dev_to_virtio(dev);
  363. chan = vdev->priv;
  364. return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
  365. }
  366. static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
  367. /**
  368. * p9_virtio_probe - probe for existence of 9P virtio channels
  369. * @vdev: virtio device to probe
  370. *
  371. * This probes for existing virtio channels.
  372. *
  373. */
  374. static int p9_virtio_probe(struct virtio_device *vdev)
  375. {
  376. __u16 tag_len;
  377. char *tag;
  378. int err;
  379. struct virtio_chan *chan;
  380. chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
  381. if (!chan) {
  382. printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
  383. err = -ENOMEM;
  384. goto fail;
  385. }
  386. chan->vdev = vdev;
  387. /* We expect one virtqueue, for requests. */
  388. chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
  389. if (IS_ERR(chan->vq)) {
  390. err = PTR_ERR(chan->vq);
  391. goto out_free_vq;
  392. }
  393. chan->vq->vdev->priv = chan;
  394. spin_lock_init(&chan->lock);
  395. sg_init_table(chan->sg, VIRTQUEUE_NUM);
  396. chan->inuse = false;
  397. if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
  398. vdev->config->get(vdev,
  399. offsetof(struct virtio_9p_config, tag_len),
  400. &tag_len, sizeof(tag_len));
  401. } else {
  402. err = -EINVAL;
  403. goto out_free_vq;
  404. }
  405. tag = kmalloc(tag_len, GFP_KERNEL);
  406. if (!tag) {
  407. err = -ENOMEM;
  408. goto out_free_vq;
  409. }
  410. vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
  411. tag, tag_len);
  412. chan->tag = tag;
  413. chan->tag_len = tag_len;
  414. err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
  415. if (err) {
  416. goto out_free_tag;
  417. }
  418. chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
  419. if (!chan->vc_wq) {
  420. err = -ENOMEM;
  421. goto out_free_tag;
  422. }
  423. init_waitqueue_head(chan->vc_wq);
  424. chan->ring_bufs_avail = 1;
  425. /* Ceiling limit to avoid denial of service attacks */
  426. chan->p9_max_pages = nr_free_buffer_pages()/4;
  427. mutex_lock(&virtio_9p_lock);
  428. list_add_tail(&chan->chan_list, &virtio_chan_list);
  429. mutex_unlock(&virtio_9p_lock);
  430. return 0;
  431. out_free_tag:
  432. kfree(tag);
  433. out_free_vq:
  434. vdev->config->del_vqs(vdev);
  435. kfree(chan);
  436. fail:
  437. return err;
  438. }
  439. /**
  440. * p9_virtio_create - allocate a new virtio channel
  441. * @client: client instance invoking this transport
  442. * @devname: string identifying the channel to connect to (unused)
  443. * @args: args passed from sys_mount() for per-transport options (unused)
  444. *
  445. * This sets up a transport channel for 9p communication. Right now
  446. * we only match the first available channel, but eventually we couldlook up
  447. * alternate channels by matching devname versus a virtio_config entry.
  448. * We use a simple reference count mechanism to ensure that only a single
  449. * mount has a channel open at a time.
  450. *
  451. */
  452. static int
  453. p9_virtio_create(struct p9_client *client, const char *devname, char *args)
  454. {
  455. struct virtio_chan *chan;
  456. int ret = -ENOENT;
  457. int found = 0;
  458. mutex_lock(&virtio_9p_lock);
  459. list_for_each_entry(chan, &virtio_chan_list, chan_list) {
  460. if (!strncmp(devname, chan->tag, chan->tag_len) &&
  461. strlen(devname) == chan->tag_len) {
  462. if (!chan->inuse) {
  463. chan->inuse = true;
  464. found = 1;
  465. break;
  466. }
  467. ret = -EBUSY;
  468. }
  469. }
  470. mutex_unlock(&virtio_9p_lock);
  471. if (!found) {
  472. printk(KERN_ERR "9p: no channels available\n");
  473. return ret;
  474. }
  475. client->trans = (void *)chan;
  476. client->status = Connected;
  477. chan->client = client;
  478. return 0;
  479. }
  480. /**
  481. * p9_virtio_remove - clean up resources associated with a virtio device
  482. * @vdev: virtio device to remove
  483. *
  484. */
  485. static void p9_virtio_remove(struct virtio_device *vdev)
  486. {
  487. struct virtio_chan *chan = vdev->priv;
  488. BUG_ON(chan->inuse);
  489. vdev->config->del_vqs(vdev);
  490. mutex_lock(&virtio_9p_lock);
  491. list_del(&chan->chan_list);
  492. mutex_unlock(&virtio_9p_lock);
  493. sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
  494. kfree(chan->tag);
  495. kfree(chan->vc_wq);
  496. kfree(chan);
  497. }
  498. static struct virtio_device_id id_table[] = {
  499. { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
  500. { 0 },
  501. };
  502. static unsigned int features[] = {
  503. VIRTIO_9P_MOUNT_TAG,
  504. };
  505. /* The standard "struct lguest_driver": */
  506. static struct virtio_driver p9_virtio_drv = {
  507. .feature_table = features,
  508. .feature_table_size = ARRAY_SIZE(features),
  509. .driver.name = KBUILD_MODNAME,
  510. .driver.owner = THIS_MODULE,
  511. .id_table = id_table,
  512. .probe = p9_virtio_probe,
  513. .remove = p9_virtio_remove,
  514. };
  515. static struct p9_trans_module p9_virtio_trans = {
  516. .name = "virtio",
  517. .create = p9_virtio_create,
  518. .close = p9_virtio_close,
  519. .request = p9_virtio_request,
  520. .cancel = p9_virtio_cancel,
  521. /*
  522. * We leave one entry for input and one entry for response
  523. * headers. We also skip one more entry to accomodate, address
  524. * that are not at page boundary, that can result in an extra
  525. * page in zero copy.
  526. */
  527. .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
  528. .pref = P9_TRANS_PREF_PAYLOAD_SEP,
  529. .def = 0,
  530. .owner = THIS_MODULE,
  531. };
  532. /* The standard init function */
  533. static int __init p9_virtio_init(void)
  534. {
  535. INIT_LIST_HEAD(&virtio_chan_list);
  536. v9fs_register_trans(&p9_virtio_trans);
  537. return register_virtio_driver(&p9_virtio_drv);
  538. }
  539. static void __exit p9_virtio_cleanup(void)
  540. {
  541. unregister_virtio_driver(&p9_virtio_drv);
  542. v9fs_unregister_trans(&p9_virtio_trans);
  543. }
  544. module_init(p9_virtio_init);
  545. module_exit(p9_virtio_cleanup);
  546. MODULE_DEVICE_TABLE(virtio, id_table);
  547. MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
  548. MODULE_DESCRIPTION("Virtio 9p Transport");
  549. MODULE_LICENSE("GPL");