virtio_ring.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. /* Virtio ring implementation.
  2. *
  3. * Copyright 2007 Rusty Russell IBM Corporation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. */
  19. #include <linux/virtio.h>
  20. #include <linux/virtio_ring.h>
  21. #include <linux/virtio_config.h>
  22. #include <linux/device.h>
  23. #include <linux/slab.h>
  24. #include <linux/module.h>
  25. #include <linux/hrtimer.h>
  26. #include <linux/kmemleak.h>
  27. #include <linux/dma-mapping.h>
  28. #include <xen/xen.h>
  29. #ifdef DEBUG
  30. /* For development, we want to crash whenever the ring is screwed. */
  31. #define BAD_RING(_vq, fmt, args...) \
  32. do { \
  33. dev_err(&(_vq)->vq.vdev->dev, \
  34. "%s:"fmt, (_vq)->vq.name, ##args); \
  35. BUG(); \
  36. } while (0)
  37. /* Caller is supposed to guarantee no reentry. */
  38. #define START_USE(_vq) \
  39. do { \
  40. if ((_vq)->in_use) \
  41. panic("%s:in_use = %i\n", \
  42. (_vq)->vq.name, (_vq)->in_use); \
  43. (_vq)->in_use = __LINE__; \
  44. } while (0)
  45. #define END_USE(_vq) \
  46. do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  47. #else
  48. #define BAD_RING(_vq, fmt, args...) \
  49. do { \
  50. dev_err(&_vq->vq.vdev->dev, \
  51. "%s:"fmt, (_vq)->vq.name, ##args); \
  52. (_vq)->broken = true; \
  53. } while (0)
  54. #define START_USE(vq)
  55. #define END_USE(vq)
  56. #endif
  57. struct vring_desc_state {
  58. void *data; /* Data for callback. */
  59. struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
  60. };
  61. struct vring_virtqueue {
  62. struct virtqueue vq;
  63. /* Actual memory layout for this queue */
  64. struct vring vring;
  65. /* Can we use weak barriers? */
  66. bool weak_barriers;
  67. /* Other side has made a mess, don't try any more. */
  68. bool broken;
  69. /* Host supports indirect buffers */
  70. bool indirect;
  71. /* Host publishes avail event idx */
  72. bool event;
  73. /* Head of free buffer list. */
  74. unsigned int free_head;
  75. /* Number we've added since last sync. */
  76. unsigned int num_added;
  77. /* Last used index we've seen. */
  78. u16 last_used_idx;
  79. /* Last written value to avail->flags */
  80. u16 avail_flags_shadow;
  81. /* Last written value to avail->idx in guest byte order */
  82. u16 avail_idx_shadow;
  83. /* How to notify other side. FIXME: commonalize hcalls! */
  84. bool (*notify)(struct virtqueue *vq);
  85. /* DMA, allocation, and size information */
  86. bool we_own_ring;
  87. size_t queue_size_in_bytes;
  88. dma_addr_t queue_dma_addr;
  89. #ifdef DEBUG
  90. /* They're supposed to lock for us. */
  91. unsigned int in_use;
  92. /* Figure out if their kicks are too delayed. */
  93. bool last_add_time_valid;
  94. ktime_t last_add_time;
  95. #endif
  96. /* Per-descriptor state. */
  97. struct vring_desc_state desc_state[];
  98. };
  99. #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
  100. /*
  101. * Modern virtio devices have feature bits to specify whether they need a
  102. * quirk and bypass the IOMMU. If not there, just use the DMA API.
  103. *
  104. * If there, the interaction between virtio and DMA API is messy.
  105. *
  106. * On most systems with virtio, physical addresses match bus addresses,
  107. * and it doesn't particularly matter whether we use the DMA API.
  108. *
  109. * On some systems, including Xen and any system with a physical device
  110. * that speaks virtio behind a physical IOMMU, we must use the DMA API
  111. * for virtio DMA to work at all.
  112. *
  113. * On other systems, including SPARC and PPC64, virtio-pci devices are
  114. * enumerated as though they are behind an IOMMU, but the virtio host
  115. * ignores the IOMMU, so we must either pretend that the IOMMU isn't
  116. * there or somehow map everything as the identity.
  117. *
  118. * For the time being, we preserve historic behavior and bypass the DMA
  119. * API.
  120. *
  121. * TODO: install a per-device DMA ops structure that does the right thing
  122. * taking into account all the above quirks, and use the DMA API
  123. * unconditionally on data path.
  124. */
  125. static bool vring_use_dma_api(struct virtio_device *vdev)
  126. {
  127. if (!virtio_has_iommu_quirk(vdev))
  128. return true;
  129. /* Otherwise, we are left to guess. */
  130. /*
  131. * In theory, it's possible to have a buggy QEMU-supposed
  132. * emulated Q35 IOMMU and Xen enabled at the same time. On
  133. * such a configuration, virtio has never worked and will
  134. * not work without an even larger kludge. Instead, enable
  135. * the DMA API if we're a Xen guest, which at least allows
  136. * all of the sensible Xen configurations to work correctly.
  137. */
  138. if (xen_domain())
  139. return true;
  140. return false;
  141. }
  142. /*
  143. * The DMA ops on various arches are rather gnarly right now, and
  144. * making all of the arch DMA ops work on the vring device itself
  145. * is a mess. For now, we use the parent device for DMA ops.
  146. */
  147. static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
  148. {
  149. return vq->vq.vdev->dev.parent;
  150. }
  151. /* Map one sg entry. */
  152. static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
  153. struct scatterlist *sg,
  154. enum dma_data_direction direction)
  155. {
  156. if (!vring_use_dma_api(vq->vq.vdev))
  157. return (dma_addr_t)sg_phys(sg);
  158. /*
  159. * We can't use dma_map_sg, because we don't use scatterlists in
  160. * the way it expects (we don't guarantee that the scatterlist
  161. * will exist for the lifetime of the mapping).
  162. */
  163. return dma_map_page(vring_dma_dev(vq),
  164. sg_page(sg), sg->offset, sg->length,
  165. direction);
  166. }
  167. static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
  168. void *cpu_addr, size_t size,
  169. enum dma_data_direction direction)
  170. {
  171. if (!vring_use_dma_api(vq->vq.vdev))
  172. return (dma_addr_t)virt_to_phys(cpu_addr);
  173. return dma_map_single(vring_dma_dev(vq),
  174. cpu_addr, size, direction);
  175. }
  176. static void vring_unmap_one(const struct vring_virtqueue *vq,
  177. struct vring_desc *desc)
  178. {
  179. u16 flags;
  180. if (!vring_use_dma_api(vq->vq.vdev))
  181. return;
  182. flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
  183. if (flags & VRING_DESC_F_INDIRECT) {
  184. dma_unmap_single(vring_dma_dev(vq),
  185. virtio64_to_cpu(vq->vq.vdev, desc->addr),
  186. virtio32_to_cpu(vq->vq.vdev, desc->len),
  187. (flags & VRING_DESC_F_WRITE) ?
  188. DMA_FROM_DEVICE : DMA_TO_DEVICE);
  189. } else {
  190. dma_unmap_page(vring_dma_dev(vq),
  191. virtio64_to_cpu(vq->vq.vdev, desc->addr),
  192. virtio32_to_cpu(vq->vq.vdev, desc->len),
  193. (flags & VRING_DESC_F_WRITE) ?
  194. DMA_FROM_DEVICE : DMA_TO_DEVICE);
  195. }
  196. }
  197. static int vring_mapping_error(const struct vring_virtqueue *vq,
  198. dma_addr_t addr)
  199. {
  200. if (!vring_use_dma_api(vq->vq.vdev))
  201. return 0;
  202. return dma_mapping_error(vring_dma_dev(vq), addr);
  203. }
  204. static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
  205. unsigned int total_sg, gfp_t gfp)
  206. {
  207. struct vring_desc *desc;
  208. unsigned int i;
  209. /*
  210. * We require lowmem mappings for the descriptors because
  211. * otherwise virt_to_phys will give us bogus addresses in the
  212. * virtqueue.
  213. */
  214. gfp &= ~__GFP_HIGHMEM;
  215. desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
  216. if (!desc)
  217. return NULL;
  218. for (i = 0; i < total_sg; i++)
  219. desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
  220. return desc;
  221. }
  222. static inline int virtqueue_add(struct virtqueue *_vq,
  223. struct scatterlist *sgs[],
  224. unsigned int total_sg,
  225. unsigned int out_sgs,
  226. unsigned int in_sgs,
  227. void *data,
  228. gfp_t gfp)
  229. {
  230. struct vring_virtqueue *vq = to_vvq(_vq);
  231. struct scatterlist *sg;
  232. struct vring_desc *desc;
  233. unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
  234. int head;
  235. bool indirect;
  236. START_USE(vq);
  237. BUG_ON(data == NULL);
  238. if (unlikely(vq->broken)) {
  239. END_USE(vq);
  240. return -EIO;
  241. }
  242. #ifdef DEBUG
  243. {
  244. ktime_t now = ktime_get();
  245. /* No kick or get, with .1 second between? Warn. */
  246. if (vq->last_add_time_valid)
  247. WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
  248. > 100);
  249. vq->last_add_time = now;
  250. vq->last_add_time_valid = true;
  251. }
  252. #endif
  253. BUG_ON(total_sg > vq->vring.num);
  254. BUG_ON(total_sg == 0);
  255. head = vq->free_head;
  256. /* If the host supports indirect descriptor tables, and we have multiple
  257. * buffers, then go indirect. FIXME: tune this threshold */
  258. if (vq->indirect && total_sg > 1 && vq->vq.num_free)
  259. desc = alloc_indirect(_vq, total_sg, gfp);
  260. else
  261. desc = NULL;
  262. if (desc) {
  263. /* Use a single buffer which doesn't continue */
  264. indirect = true;
  265. /* Set up rest to use this indirect table. */
  266. i = 0;
  267. descs_used = 1;
  268. } else {
  269. indirect = false;
  270. desc = vq->vring.desc;
  271. i = head;
  272. descs_used = total_sg;
  273. }
  274. if (vq->vq.num_free < descs_used) {
  275. pr_debug("Can't add buf len %i - avail = %i\n",
  276. descs_used, vq->vq.num_free);
  277. /* FIXME: for historical reasons, we force a notify here if
  278. * there are outgoing parts to the buffer. Presumably the
  279. * host should service the ring ASAP. */
  280. if (out_sgs)
  281. vq->notify(&vq->vq);
  282. if (indirect)
  283. kfree(desc);
  284. END_USE(vq);
  285. return -ENOSPC;
  286. }
  287. for (n = 0; n < out_sgs; n++) {
  288. for (sg = sgs[n]; sg; sg = sg_next(sg)) {
  289. dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
  290. if (vring_mapping_error(vq, addr))
  291. goto unmap_release;
  292. desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
  293. desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
  294. desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
  295. prev = i;
  296. i = virtio16_to_cpu(_vq->vdev, desc[i].next);
  297. }
  298. }
  299. for (; n < (out_sgs + in_sgs); n++) {
  300. for (sg = sgs[n]; sg; sg = sg_next(sg)) {
  301. dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
  302. if (vring_mapping_error(vq, addr))
  303. goto unmap_release;
  304. desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
  305. desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
  306. desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
  307. prev = i;
  308. i = virtio16_to_cpu(_vq->vdev, desc[i].next);
  309. }
  310. }
  311. /* Last one doesn't continue. */
  312. desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
  313. if (indirect) {
  314. /* Now that the indirect table is filled in, map it. */
  315. dma_addr_t addr = vring_map_single(
  316. vq, desc, total_sg * sizeof(struct vring_desc),
  317. DMA_TO_DEVICE);
  318. if (vring_mapping_error(vq, addr))
  319. goto unmap_release;
  320. vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
  321. vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
  322. vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
  323. }
  324. /* We're using some buffers from the free list. */
  325. vq->vq.num_free -= descs_used;
  326. /* Update free pointer */
  327. if (indirect)
  328. vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
  329. else
  330. vq->free_head = i;
  331. /* Store token and indirect buffer state. */
  332. vq->desc_state[head].data = data;
  333. if (indirect)
  334. vq->desc_state[head].indir_desc = desc;
  335. /* Put entry in available array (but don't update avail->idx until they
  336. * do sync). */
  337. avail = vq->avail_idx_shadow & (vq->vring.num - 1);
  338. vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
  339. /* Descriptors and available array need to be set before we expose the
  340. * new available array entries. */
  341. virtio_wmb(vq->weak_barriers);
  342. vq->avail_idx_shadow++;
  343. vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
  344. vq->num_added++;
  345. pr_debug("Added buffer head %i to %p\n", head, vq);
  346. END_USE(vq);
  347. /* This is very unlikely, but theoretically possible. Kick
  348. * just in case. */
  349. if (unlikely(vq->num_added == (1 << 16) - 1))
  350. virtqueue_kick(_vq);
  351. return 0;
  352. unmap_release:
  353. err_idx = i;
  354. i = head;
  355. for (n = 0; n < total_sg; n++) {
  356. if (i == err_idx)
  357. break;
  358. vring_unmap_one(vq, &desc[i]);
  359. i = vq->vring.desc[i].next;
  360. }
  361. if (indirect)
  362. kfree(desc);
  363. END_USE(vq);
  364. return -EIO;
  365. }
  366. /**
  367. * virtqueue_add_sgs - expose buffers to other end
  368. * @vq: the struct virtqueue we're talking about.
  369. * @sgs: array of terminated scatterlists.
  370. * @out_num: the number of scatterlists readable by other side
  371. * @in_num: the number of scatterlists which are writable (after readable ones)
  372. * @data: the token identifying the buffer.
  373. * @gfp: how to do memory allocations (if necessary).
  374. *
  375. * Caller must ensure we don't call this with other virtqueue operations
  376. * at the same time (except where noted).
  377. *
  378. * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  379. */
  380. int virtqueue_add_sgs(struct virtqueue *_vq,
  381. struct scatterlist *sgs[],
  382. unsigned int out_sgs,
  383. unsigned int in_sgs,
  384. void *data,
  385. gfp_t gfp)
  386. {
  387. unsigned int i, total_sg = 0;
  388. /* Count them first. */
  389. for (i = 0; i < out_sgs + in_sgs; i++) {
  390. struct scatterlist *sg;
  391. for (sg = sgs[i]; sg; sg = sg_next(sg))
  392. total_sg++;
  393. }
  394. return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
  395. }
  396. EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
  397. /**
  398. * virtqueue_add_outbuf - expose output buffers to other end
  399. * @vq: the struct virtqueue we're talking about.
  400. * @sg: scatterlist (must be well-formed and terminated!)
  401. * @num: the number of entries in @sg readable by other side
  402. * @data: the token identifying the buffer.
  403. * @gfp: how to do memory allocations (if necessary).
  404. *
  405. * Caller must ensure we don't call this with other virtqueue operations
  406. * at the same time (except where noted).
  407. *
  408. * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  409. */
  410. int virtqueue_add_outbuf(struct virtqueue *vq,
  411. struct scatterlist *sg, unsigned int num,
  412. void *data,
  413. gfp_t gfp)
  414. {
  415. return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
  416. }
  417. EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
  418. /**
  419. * virtqueue_add_inbuf - expose input buffers to other end
  420. * @vq: the struct virtqueue we're talking about.
  421. * @sg: scatterlist (must be well-formed and terminated!)
  422. * @num: the number of entries in @sg writable by other side
  423. * @data: the token identifying the buffer.
  424. * @gfp: how to do memory allocations (if necessary).
  425. *
  426. * Caller must ensure we don't call this with other virtqueue operations
  427. * at the same time (except where noted).
  428. *
  429. * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  430. */
  431. int virtqueue_add_inbuf(struct virtqueue *vq,
  432. struct scatterlist *sg, unsigned int num,
  433. void *data,
  434. gfp_t gfp)
  435. {
  436. return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
  437. }
  438. EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
  439. /**
  440. * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  441. * @vq: the struct virtqueue
  442. *
  443. * Instead of virtqueue_kick(), you can do:
  444. * if (virtqueue_kick_prepare(vq))
  445. * virtqueue_notify(vq);
  446. *
  447. * This is sometimes useful because the virtqueue_kick_prepare() needs
  448. * to be serialized, but the actual virtqueue_notify() call does not.
  449. */
  450. bool virtqueue_kick_prepare(struct virtqueue *_vq)
  451. {
  452. struct vring_virtqueue *vq = to_vvq(_vq);
  453. u16 new, old;
  454. bool needs_kick;
  455. START_USE(vq);
  456. /* We need to expose available array entries before checking avail
  457. * event. */
  458. virtio_mb(vq->weak_barriers);
  459. old = vq->avail_idx_shadow - vq->num_added;
  460. new = vq->avail_idx_shadow;
  461. vq->num_added = 0;
  462. #ifdef DEBUG
  463. if (vq->last_add_time_valid) {
  464. WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
  465. vq->last_add_time)) > 100);
  466. }
  467. vq->last_add_time_valid = false;
  468. #endif
  469. if (vq->event) {
  470. needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
  471. new, old);
  472. } else {
  473. needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
  474. }
  475. END_USE(vq);
  476. return needs_kick;
  477. }
  478. EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
  479. /**
  480. * virtqueue_notify - second half of split virtqueue_kick call.
  481. * @vq: the struct virtqueue
  482. *
  483. * This does not need to be serialized.
  484. *
  485. * Returns false if host notify failed or queue is broken, otherwise true.
  486. */
  487. bool virtqueue_notify(struct virtqueue *_vq)
  488. {
  489. struct vring_virtqueue *vq = to_vvq(_vq);
  490. if (unlikely(vq->broken))
  491. return false;
  492. /* Prod other side to tell it about changes. */
  493. if (!vq->notify(_vq)) {
  494. vq->broken = true;
  495. return false;
  496. }
  497. return true;
  498. }
  499. EXPORT_SYMBOL_GPL(virtqueue_notify);
  500. /**
  501. * virtqueue_kick - update after add_buf
  502. * @vq: the struct virtqueue
  503. *
  504. * After one or more virtqueue_add_* calls, invoke this to kick
  505. * the other side.
  506. *
  507. * Caller must ensure we don't call this with other virtqueue
  508. * operations at the same time (except where noted).
  509. *
  510. * Returns false if kick failed, otherwise true.
  511. */
  512. bool virtqueue_kick(struct virtqueue *vq)
  513. {
  514. if (virtqueue_kick_prepare(vq))
  515. return virtqueue_notify(vq);
  516. return true;
  517. }
  518. EXPORT_SYMBOL_GPL(virtqueue_kick);
  519. static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
  520. {
  521. unsigned int i, j;
  522. u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
  523. /* Clear data ptr. */
  524. vq->desc_state[head].data = NULL;
  525. /* Put back on free list: unmap first-level descriptors and find end */
  526. i = head;
  527. while (vq->vring.desc[i].flags & nextflag) {
  528. vring_unmap_one(vq, &vq->vring.desc[i]);
  529. i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
  530. vq->vq.num_free++;
  531. }
  532. vring_unmap_one(vq, &vq->vring.desc[i]);
  533. vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
  534. vq->free_head = head;
  535. /* Plus final descriptor */
  536. vq->vq.num_free++;
  537. /* Free the indirect table, if any, now that it's unmapped. */
  538. if (vq->desc_state[head].indir_desc) {
  539. struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
  540. u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
  541. BUG_ON(!(vq->vring.desc[head].flags &
  542. cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
  543. BUG_ON(len == 0 || len % sizeof(struct vring_desc));
  544. for (j = 0; j < len / sizeof(struct vring_desc); j++)
  545. vring_unmap_one(vq, &indir_desc[j]);
  546. kfree(vq->desc_state[head].indir_desc);
  547. vq->desc_state[head].indir_desc = NULL;
  548. }
  549. }
  550. static inline bool more_used(const struct vring_virtqueue *vq)
  551. {
  552. return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
  553. }
  554. /**
  555. * virtqueue_get_buf - get the next used buffer
  556. * @vq: the struct virtqueue we're talking about.
  557. * @len: the length written into the buffer
  558. *
  559. * If the driver wrote data into the buffer, @len will be set to the
  560. * amount written. This means you don't need to clear the buffer
  561. * beforehand to ensure there's no data leakage in the case of short
  562. * writes.
  563. *
  564. * Caller must ensure we don't call this with other virtqueue
  565. * operations at the same time (except where noted).
  566. *
  567. * Returns NULL if there are no used buffers, or the "data" token
  568. * handed to virtqueue_add_*().
  569. */
  570. void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
  571. {
  572. struct vring_virtqueue *vq = to_vvq(_vq);
  573. void *ret;
  574. unsigned int i;
  575. u16 last_used;
  576. START_USE(vq);
  577. if (unlikely(vq->broken)) {
  578. END_USE(vq);
  579. return NULL;
  580. }
  581. if (!more_used(vq)) {
  582. pr_debug("No more buffers in queue\n");
  583. END_USE(vq);
  584. return NULL;
  585. }
  586. /* Only get used array entries after they have been exposed by host. */
  587. virtio_rmb(vq->weak_barriers);
  588. last_used = (vq->last_used_idx & (vq->vring.num - 1));
  589. i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
  590. *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
  591. if (unlikely(i >= vq->vring.num)) {
  592. BAD_RING(vq, "id %u out of range\n", i);
  593. return NULL;
  594. }
  595. if (unlikely(!vq->desc_state[i].data)) {
  596. BAD_RING(vq, "id %u is not a head!\n", i);
  597. return NULL;
  598. }
  599. /* detach_buf clears data, so grab it now. */
  600. ret = vq->desc_state[i].data;
  601. detach_buf(vq, i);
  602. vq->last_used_idx++;
  603. /* If we expect an interrupt for the next entry, tell host
  604. * by writing event index and flush out the write before
  605. * the read in the next get_buf call. */
  606. if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
  607. virtio_store_mb(vq->weak_barriers,
  608. &vring_used_event(&vq->vring),
  609. cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
  610. #ifdef DEBUG
  611. vq->last_add_time_valid = false;
  612. #endif
  613. END_USE(vq);
  614. return ret;
  615. }
  616. EXPORT_SYMBOL_GPL(virtqueue_get_buf);
  617. /**
  618. * virtqueue_disable_cb - disable callbacks
  619. * @vq: the struct virtqueue we're talking about.
  620. *
  621. * Note that this is not necessarily synchronous, hence unreliable and only
  622. * useful as an optimization.
  623. *
  624. * Unlike other operations, this need not be serialized.
  625. */
  626. void virtqueue_disable_cb(struct virtqueue *_vq)
  627. {
  628. struct vring_virtqueue *vq = to_vvq(_vq);
  629. if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
  630. vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
  631. if (!vq->event)
  632. vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
  633. }
  634. }
  635. EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
  636. /**
  637. * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
  638. * @vq: the struct virtqueue we're talking about.
  639. *
  640. * This re-enables callbacks; it returns current queue state
  641. * in an opaque unsigned value. This value should be later tested by
  642. * virtqueue_poll, to detect a possible race between the driver checking for
  643. * more work, and enabling callbacks.
  644. *
  645. * Caller must ensure we don't call this with other virtqueue
  646. * operations at the same time (except where noted).
  647. */
  648. unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
  649. {
  650. struct vring_virtqueue *vq = to_vvq(_vq);
  651. u16 last_used_idx;
  652. START_USE(vq);
  653. /* We optimistically turn back on interrupts, then check if there was
  654. * more to do. */
  655. /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
  656. * either clear the flags bit or point the event index at the next
  657. * entry. Always do both to keep code simple. */
  658. if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
  659. vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
  660. if (!vq->event)
  661. vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
  662. }
  663. vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
  664. END_USE(vq);
  665. return last_used_idx;
  666. }
  667. EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
  668. /**
  669. * virtqueue_poll - query pending used buffers
  670. * @vq: the struct virtqueue we're talking about.
  671. * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
  672. *
  673. * Returns "true" if there are pending used buffers in the queue.
  674. *
  675. * This does not need to be serialized.
  676. */
  677. bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
  678. {
  679. struct vring_virtqueue *vq = to_vvq(_vq);
  680. virtio_mb(vq->weak_barriers);
  681. return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
  682. }
  683. EXPORT_SYMBOL_GPL(virtqueue_poll);
  684. /**
  685. * virtqueue_enable_cb - restart callbacks after disable_cb.
  686. * @vq: the struct virtqueue we're talking about.
  687. *
  688. * This re-enables callbacks; it returns "false" if there are pending
  689. * buffers in the queue, to detect a possible race between the driver
  690. * checking for more work, and enabling callbacks.
  691. *
  692. * Caller must ensure we don't call this with other virtqueue
  693. * operations at the same time (except where noted).
  694. */
  695. bool virtqueue_enable_cb(struct virtqueue *_vq)
  696. {
  697. unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
  698. return !virtqueue_poll(_vq, last_used_idx);
  699. }
  700. EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
  701. /**
  702. * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
  703. * @vq: the struct virtqueue we're talking about.
  704. *
  705. * This re-enables callbacks but hints to the other side to delay
  706. * interrupts until most of the available buffers have been processed;
  707. * it returns "false" if there are many pending buffers in the queue,
  708. * to detect a possible race between the driver checking for more work,
  709. * and enabling callbacks.
  710. *
  711. * Caller must ensure we don't call this with other virtqueue
  712. * operations at the same time (except where noted).
  713. */
  714. bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
  715. {
  716. struct vring_virtqueue *vq = to_vvq(_vq);
  717. u16 bufs;
  718. START_USE(vq);
  719. /* We optimistically turn back on interrupts, then check if there was
  720. * more to do. */
  721. /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
  722. * either clear the flags bit or point the event index at the next
  723. * entry. Always update the event index to keep code simple. */
  724. if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
  725. vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
  726. if (!vq->event)
  727. vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
  728. }
  729. /* TODO: tune this threshold */
  730. bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
  731. virtio_store_mb(vq->weak_barriers,
  732. &vring_used_event(&vq->vring),
  733. cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
  734. if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
  735. END_USE(vq);
  736. return false;
  737. }
  738. END_USE(vq);
  739. return true;
  740. }
  741. EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  742. /**
  743. * virtqueue_detach_unused_buf - detach first unused buffer
  744. * @vq: the struct virtqueue we're talking about.
  745. *
  746. * Returns NULL or the "data" token handed to virtqueue_add_*().
  747. * This is not valid on an active queue; it is useful only for device
  748. * shutdown.
  749. */
  750. void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
  751. {
  752. struct vring_virtqueue *vq = to_vvq(_vq);
  753. unsigned int i;
  754. void *buf;
  755. START_USE(vq);
  756. for (i = 0; i < vq->vring.num; i++) {
  757. if (!vq->desc_state[i].data)
  758. continue;
  759. /* detach_buf clears data, so grab it now. */
  760. buf = vq->desc_state[i].data;
  761. detach_buf(vq, i);
  762. vq->avail_idx_shadow--;
  763. vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
  764. END_USE(vq);
  765. return buf;
  766. }
  767. /* That should have freed everything. */
  768. BUG_ON(vq->vq.num_free != vq->vring.num);
  769. END_USE(vq);
  770. return NULL;
  771. }
  772. EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
  773. irqreturn_t vring_interrupt(int irq, void *_vq)
  774. {
  775. struct vring_virtqueue *vq = to_vvq(_vq);
  776. if (!more_used(vq)) {
  777. pr_debug("virtqueue interrupt with no work for %p\n", vq);
  778. return IRQ_NONE;
  779. }
  780. if (unlikely(vq->broken))
  781. return IRQ_HANDLED;
  782. pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
  783. if (vq->vq.callback)
  784. vq->vq.callback(&vq->vq);
  785. return IRQ_HANDLED;
  786. }
  787. EXPORT_SYMBOL_GPL(vring_interrupt);
  788. struct virtqueue *__vring_new_virtqueue(unsigned int index,
  789. struct vring vring,
  790. struct virtio_device *vdev,
  791. bool weak_barriers,
  792. bool (*notify)(struct virtqueue *),
  793. void (*callback)(struct virtqueue *),
  794. const char *name)
  795. {
  796. unsigned int i;
  797. struct vring_virtqueue *vq;
  798. vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
  799. GFP_KERNEL);
  800. if (!vq)
  801. return NULL;
  802. vq->vring = vring;
  803. vq->vq.callback = callback;
  804. vq->vq.vdev = vdev;
  805. vq->vq.name = name;
  806. vq->vq.num_free = vring.num;
  807. vq->vq.index = index;
  808. vq->we_own_ring = false;
  809. vq->queue_dma_addr = 0;
  810. vq->queue_size_in_bytes = 0;
  811. vq->notify = notify;
  812. vq->weak_barriers = weak_barriers;
  813. vq->broken = false;
  814. vq->last_used_idx = 0;
  815. vq->avail_flags_shadow = 0;
  816. vq->avail_idx_shadow = 0;
  817. vq->num_added = 0;
  818. list_add_tail(&vq->vq.list, &vdev->vqs);
  819. #ifdef DEBUG
  820. vq->in_use = false;
  821. vq->last_add_time_valid = false;
  822. #endif
  823. vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
  824. vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
  825. /* No callback? Tell other side not to bother us. */
  826. if (!callback) {
  827. vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
  828. if (!vq->event)
  829. vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
  830. }
  831. /* Put everything in free lists. */
  832. vq->free_head = 0;
  833. for (i = 0; i < vring.num-1; i++)
  834. vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
  835. memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
  836. return &vq->vq;
  837. }
  838. EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
  839. static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
  840. dma_addr_t *dma_handle, gfp_t flag)
  841. {
  842. if (vring_use_dma_api(vdev)) {
  843. return dma_alloc_coherent(vdev->dev.parent, size,
  844. dma_handle, flag);
  845. } else {
  846. void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
  847. if (queue) {
  848. phys_addr_t phys_addr = virt_to_phys(queue);
  849. *dma_handle = (dma_addr_t)phys_addr;
  850. /*
  851. * Sanity check: make sure we dind't truncate
  852. * the address. The only arches I can find that
  853. * have 64-bit phys_addr_t but 32-bit dma_addr_t
  854. * are certain non-highmem MIPS and x86
  855. * configurations, but these configurations
  856. * should never allocate physical pages above 32
  857. * bits, so this is fine. Just in case, throw a
  858. * warning and abort if we end up with an
  859. * unrepresentable address.
  860. */
  861. if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
  862. free_pages_exact(queue, PAGE_ALIGN(size));
  863. return NULL;
  864. }
  865. }
  866. return queue;
  867. }
  868. }
  869. static void vring_free_queue(struct virtio_device *vdev, size_t size,
  870. void *queue, dma_addr_t dma_handle)
  871. {
  872. if (vring_use_dma_api(vdev)) {
  873. dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
  874. } else {
  875. free_pages_exact(queue, PAGE_ALIGN(size));
  876. }
  877. }
  878. struct virtqueue *vring_create_virtqueue(
  879. unsigned int index,
  880. unsigned int num,
  881. unsigned int vring_align,
  882. struct virtio_device *vdev,
  883. bool weak_barriers,
  884. bool may_reduce_num,
  885. bool (*notify)(struct virtqueue *),
  886. void (*callback)(struct virtqueue *),
  887. const char *name)
  888. {
  889. struct virtqueue *vq;
  890. void *queue = NULL;
  891. dma_addr_t dma_addr;
  892. size_t queue_size_in_bytes;
  893. struct vring vring;
  894. /* We assume num is a power of 2. */
  895. if (num & (num - 1)) {
  896. dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
  897. return NULL;
  898. }
  899. /* TODO: allocate each queue chunk individually */
  900. for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
  901. queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
  902. &dma_addr,
  903. GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
  904. if (queue)
  905. break;
  906. }
  907. if (!num)
  908. return NULL;
  909. if (!queue) {
  910. /* Try to get a single page. You are my only hope! */
  911. queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
  912. &dma_addr, GFP_KERNEL|__GFP_ZERO);
  913. }
  914. if (!queue)
  915. return NULL;
  916. queue_size_in_bytes = vring_size(num, vring_align);
  917. vring_init(&vring, num, queue, vring_align);
  918. vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers,
  919. notify, callback, name);
  920. if (!vq) {
  921. vring_free_queue(vdev, queue_size_in_bytes, queue,
  922. dma_addr);
  923. return NULL;
  924. }
  925. to_vvq(vq)->queue_dma_addr = dma_addr;
  926. to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
  927. to_vvq(vq)->we_own_ring = true;
  928. return vq;
  929. }
  930. EXPORT_SYMBOL_GPL(vring_create_virtqueue);
  931. struct virtqueue *vring_new_virtqueue(unsigned int index,
  932. unsigned int num,
  933. unsigned int vring_align,
  934. struct virtio_device *vdev,
  935. bool weak_barriers,
  936. void *pages,
  937. bool (*notify)(struct virtqueue *vq),
  938. void (*callback)(struct virtqueue *vq),
  939. const char *name)
  940. {
  941. struct vring vring;
  942. vring_init(&vring, num, pages, vring_align);
  943. return __vring_new_virtqueue(index, vring, vdev, weak_barriers,
  944. notify, callback, name);
  945. }
  946. EXPORT_SYMBOL_GPL(vring_new_virtqueue);
  947. void vring_del_virtqueue(struct virtqueue *_vq)
  948. {
  949. struct vring_virtqueue *vq = to_vvq(_vq);
  950. if (vq->we_own_ring) {
  951. vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
  952. vq->vring.desc, vq->queue_dma_addr);
  953. }
  954. list_del(&_vq->list);
  955. kfree(vq);
  956. }
  957. EXPORT_SYMBOL_GPL(vring_del_virtqueue);
  958. /* Manipulates transport-specific feature bits. */
  959. void vring_transport_features(struct virtio_device *vdev)
  960. {
  961. unsigned int i;
  962. for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
  963. switch (i) {
  964. case VIRTIO_RING_F_INDIRECT_DESC:
  965. break;
  966. case VIRTIO_RING_F_EVENT_IDX:
  967. break;
  968. case VIRTIO_F_VERSION_1:
  969. break;
  970. case VIRTIO_F_IOMMU_PLATFORM:
  971. break;
  972. default:
  973. /* We don't understand this bit. */
  974. __virtio_clear_bit(vdev, i);
  975. }
  976. }
  977. }
  978. EXPORT_SYMBOL_GPL(vring_transport_features);
  979. /**
  980. * virtqueue_get_vring_size - return the size of the virtqueue's vring
  981. * @vq: the struct virtqueue containing the vring of interest.
  982. *
  983. * Returns the size of the vring. This is mainly used for boasting to
  984. * userspace. Unlike other operations, this need not be serialized.
  985. */
  986. unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
  987. {
  988. struct vring_virtqueue *vq = to_vvq(_vq);
  989. return vq->vring.num;
  990. }
  991. EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
  992. bool virtqueue_is_broken(struct virtqueue *_vq)
  993. {
  994. struct vring_virtqueue *vq = to_vvq(_vq);
  995. return vq->broken;
  996. }
  997. EXPORT_SYMBOL_GPL(virtqueue_is_broken);
  998. /*
  999. * This should prevent the device from being used, allowing drivers to
  1000. * recover. You may need to grab appropriate locks to flush.
  1001. */
  1002. void virtio_break_device(struct virtio_device *dev)
  1003. {
  1004. struct virtqueue *_vq;
  1005. list_for_each_entry(_vq, &dev->vqs, list) {
  1006. struct vring_virtqueue *vq = to_vvq(_vq);
  1007. vq->broken = true;
  1008. }
  1009. }
  1010. EXPORT_SYMBOL_GPL(virtio_break_device);
  1011. dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
  1012. {
  1013. struct vring_virtqueue *vq = to_vvq(_vq);
  1014. BUG_ON(!vq->we_own_ring);
  1015. return vq->queue_dma_addr;
  1016. }
  1017. EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
  1018. dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
  1019. {
  1020. struct vring_virtqueue *vq = to_vvq(_vq);
  1021. BUG_ON(!vq->we_own_ring);
  1022. return vq->queue_dma_addr +
  1023. ((char *)vq->vring.avail - (char *)vq->vring.desc);
  1024. }
  1025. EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
  1026. dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
  1027. {
  1028. struct vring_virtqueue *vq = to_vvq(_vq);
  1029. BUG_ON(!vq->we_own_ring);
  1030. return vq->queue_dma_addr +
  1031. ((char *)vq->vring.used - (char *)vq->vring.desc);
  1032. }
  1033. EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
  1034. const struct vring *virtqueue_get_vring(struct virtqueue *vq)
  1035. {
  1036. return &to_vvq(vq)->vring;
  1037. }
  1038. EXPORT_SYMBOL_GPL(virtqueue_get_vring);
  1039. MODULE_LICENSE("GPL");