vop_vringh.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2016 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * The full GNU General Public License is included in this distribution in
  16. * the file called "COPYING".
  17. *
  18. * Intel Virtio Over PCIe (VOP) driver.
  19. *
  20. */
  21. #include <linux/sched.h>
  22. #include <linux/poll.h>
  23. #include <linux/dma-mapping.h>
  24. #include <linux/mic_common.h>
  25. #include "../common/mic_dev.h"
  26. #include <linux/mic_ioctl.h>
  27. #include "vop_main.h"
  28. /* Helper API to obtain the VOP PCIe device */
  29. static inline struct device *vop_dev(struct vop_vdev *vdev)
  30. {
  31. return vdev->vpdev->dev.parent;
  32. }
  33. /* Helper API to check if a virtio device is initialized */
  34. static inline int vop_vdev_inited(struct vop_vdev *vdev)
  35. {
  36. if (!vdev)
  37. return -EINVAL;
  38. /* Device has not been created yet */
  39. if (!vdev->dd || !vdev->dd->type) {
  40. dev_err(vop_dev(vdev), "%s %d err %d\n",
  41. __func__, __LINE__, -EINVAL);
  42. return -EINVAL;
  43. }
  44. /* Device has been removed/deleted */
  45. if (vdev->dd->type == -1) {
  46. dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  47. __func__, __LINE__, -ENODEV);
  48. return -ENODEV;
  49. }
  50. return 0;
  51. }
  52. static void _vop_notify(struct vringh *vrh)
  53. {
  54. struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  55. struct vop_vdev *vdev = vvrh->vdev;
  56. struct vop_device *vpdev = vdev->vpdev;
  57. s8 db = vdev->dc->h2c_vdev_db;
  58. if (db != -1)
  59. vpdev->hw_ops->send_intr(vpdev, db);
  60. }
  61. static void vop_virtio_init_post(struct vop_vdev *vdev)
  62. {
  63. struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  64. struct vop_device *vpdev = vdev->vpdev;
  65. int i, used_size;
  66. for (i = 0; i < vdev->dd->num_vq; i++) {
  67. used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  68. sizeof(struct vring_used_elem) *
  69. le16_to_cpu(vqconfig->num));
  70. if (!le64_to_cpu(vqconfig[i].used_address)) {
  71. dev_warn(vop_dev(vdev), "used_address zero??\n");
  72. continue;
  73. }
  74. vdev->vvr[i].vrh.vring.used =
  75. (void __force *)vpdev->hw_ops->ioremap(
  76. vpdev,
  77. le64_to_cpu(vqconfig[i].used_address),
  78. used_size);
  79. }
  80. vdev->dc->used_address_updated = 0;
  81. dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  82. __func__, vdev->virtio_id);
  83. }
  84. static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  85. {
  86. int i;
  87. dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
  88. __func__, vdev->dd->status, vdev->virtio_id);
  89. for (i = 0; i < vdev->dd->num_vq; i++)
  90. /*
  91. * Avoid lockdep false positive. The + 1 is for the vop
  92. * mutex which is held in the reset devices code path.
  93. */
  94. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  95. /* 0 status means "reset" */
  96. vdev->dd->status = 0;
  97. vdev->dc->vdev_reset = 0;
  98. vdev->dc->host_ack = 1;
  99. for (i = 0; i < vdev->dd->num_vq; i++) {
  100. struct vringh *vrh = &vdev->vvr[i].vrh;
  101. vdev->vvr[i].vring.info->avail_idx = 0;
  102. vrh->completed = 0;
  103. vrh->last_avail_idx = 0;
  104. vrh->last_used_idx = 0;
  105. }
  106. for (i = 0; i < vdev->dd->num_vq; i++)
  107. mutex_unlock(&vdev->vvr[i].vr_mutex);
  108. }
  109. static void vop_virtio_reset_devices(struct vop_info *vi)
  110. {
  111. struct list_head *pos, *tmp;
  112. struct vop_vdev *vdev;
  113. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  114. vdev = list_entry(pos, struct vop_vdev, list);
  115. vop_virtio_device_reset(vdev);
  116. vdev->poll_wake = 1;
  117. wake_up(&vdev->waitq);
  118. }
  119. }
  120. static void vop_bh_handler(struct work_struct *work)
  121. {
  122. struct vop_vdev *vdev = container_of(work, struct vop_vdev,
  123. virtio_bh_work);
  124. if (vdev->dc->used_address_updated)
  125. vop_virtio_init_post(vdev);
  126. if (vdev->dc->vdev_reset)
  127. vop_virtio_device_reset(vdev);
  128. vdev->poll_wake = 1;
  129. wake_up(&vdev->waitq);
  130. }
  131. static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
  132. {
  133. struct vop_vdev *vdev = data;
  134. struct vop_device *vpdev = vdev->vpdev;
  135. vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
  136. schedule_work(&vdev->virtio_bh_work);
  137. return IRQ_HANDLED;
  138. }
  139. static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
  140. {
  141. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  142. int ret = 0, retry, i;
  143. struct vop_device *vpdev = vdev->vpdev;
  144. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  145. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  146. s8 db = bootparam->h2c_config_db;
  147. mutex_lock(&vi->vop_mutex);
  148. for (i = 0; i < vdev->dd->num_vq; i++)
  149. mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  150. if (db == -1 || vdev->dd->type == -1) {
  151. ret = -EIO;
  152. goto exit;
  153. }
  154. memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
  155. vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
  156. vpdev->hw_ops->send_intr(vpdev, db);
  157. for (retry = 100; retry--;) {
  158. ret = wait_event_timeout(wake, vdev->dc->guest_ack,
  159. msecs_to_jiffies(100));
  160. if (ret)
  161. break;
  162. }
  163. dev_dbg(vop_dev(vdev),
  164. "%s %d retry: %d\n", __func__, __LINE__, retry);
  165. vdev->dc->config_change = 0;
  166. vdev->dc->guest_ack = 0;
  167. exit:
  168. for (i = 0; i < vdev->dd->num_vq; i++)
  169. mutex_unlock(&vdev->vvr[i].vr_mutex);
  170. mutex_unlock(&vi->vop_mutex);
  171. return ret;
  172. }
  173. static int vop_copy_dp_entry(struct vop_vdev *vdev,
  174. struct mic_device_desc *argp, __u8 *type,
  175. struct mic_device_desc **devpage)
  176. {
  177. struct vop_device *vpdev = vdev->vpdev;
  178. struct mic_device_desc *devp;
  179. struct mic_vqconfig *vqconfig;
  180. int ret = 0, i;
  181. bool slot_found = false;
  182. vqconfig = mic_vq_config(argp);
  183. for (i = 0; i < argp->num_vq; i++) {
  184. if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
  185. ret = -EINVAL;
  186. dev_err(vop_dev(vdev), "%s %d err %d\n",
  187. __func__, __LINE__, ret);
  188. goto exit;
  189. }
  190. }
  191. /* Find the first free device page entry */
  192. for (i = sizeof(struct mic_bootparam);
  193. i < MIC_DP_SIZE - mic_total_desc_size(argp);
  194. i += mic_total_desc_size(devp)) {
  195. devp = vpdev->hw_ops->get_dp(vpdev) + i;
  196. if (devp->type == 0 || devp->type == -1) {
  197. slot_found = true;
  198. break;
  199. }
  200. }
  201. if (!slot_found) {
  202. ret = -EINVAL;
  203. dev_err(vop_dev(vdev), "%s %d err %d\n",
  204. __func__, __LINE__, ret);
  205. goto exit;
  206. }
  207. /*
  208. * Save off the type before doing the memcpy. Type will be set in the
  209. * end after completing all initialization for the new device.
  210. */
  211. *type = argp->type;
  212. argp->type = 0;
  213. memcpy(devp, argp, mic_desc_size(argp));
  214. *devpage = devp;
  215. exit:
  216. return ret;
  217. }
  218. static void vop_init_device_ctrl(struct vop_vdev *vdev,
  219. struct mic_device_desc *devpage)
  220. {
  221. struct mic_device_ctrl *dc;
  222. dc = (void *)devpage + mic_aligned_desc_size(devpage);
  223. dc->config_change = 0;
  224. dc->guest_ack = 0;
  225. dc->vdev_reset = 0;
  226. dc->host_ack = 0;
  227. dc->used_address_updated = 0;
  228. dc->c2h_vdev_db = -1;
  229. dc->h2c_vdev_db = -1;
  230. vdev->dc = dc;
  231. }
  232. static int vop_virtio_add_device(struct vop_vdev *vdev,
  233. struct mic_device_desc *argp)
  234. {
  235. struct vop_info *vi = vdev->vi;
  236. struct vop_device *vpdev = vi->vpdev;
  237. struct mic_device_desc *dd = NULL;
  238. struct mic_vqconfig *vqconfig;
  239. int vr_size, i, j, ret;
  240. u8 type = 0;
  241. s8 db = -1;
  242. char irqname[16];
  243. struct mic_bootparam *bootparam;
  244. u16 num;
  245. dma_addr_t vr_addr;
  246. bootparam = vpdev->hw_ops->get_dp(vpdev);
  247. init_waitqueue_head(&vdev->waitq);
  248. INIT_LIST_HEAD(&vdev->list);
  249. vdev->vpdev = vpdev;
  250. ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
  251. if (ret) {
  252. dev_err(vop_dev(vdev), "%s %d err %d\n",
  253. __func__, __LINE__, ret);
  254. return ret;
  255. }
  256. vop_init_device_ctrl(vdev, dd);
  257. vdev->dd = dd;
  258. vdev->virtio_id = type;
  259. vqconfig = mic_vq_config(dd);
  260. INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
  261. for (i = 0; i < dd->num_vq; i++) {
  262. struct vop_vringh *vvr = &vdev->vvr[i];
  263. struct mic_vring *vr = &vdev->vvr[i].vring;
  264. num = le16_to_cpu(vqconfig[i].num);
  265. mutex_init(&vvr->vr_mutex);
  266. vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
  267. sizeof(struct _mic_vring_info));
  268. vr->va = (void *)
  269. __get_free_pages(GFP_KERNEL | __GFP_ZERO,
  270. get_order(vr_size));
  271. if (!vr->va) {
  272. ret = -ENOMEM;
  273. dev_err(vop_dev(vdev), "%s %d err %d\n",
  274. __func__, __LINE__, ret);
  275. goto err;
  276. }
  277. vr->len = vr_size;
  278. vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
  279. vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
  280. vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
  281. DMA_BIDIRECTIONAL);
  282. if (dma_mapping_error(&vpdev->dev, vr_addr)) {
  283. free_pages((unsigned long)vr->va, get_order(vr_size));
  284. ret = -ENOMEM;
  285. dev_err(vop_dev(vdev), "%s %d err %d\n",
  286. __func__, __LINE__, ret);
  287. goto err;
  288. }
  289. vqconfig[i].address = cpu_to_le64(vr_addr);
  290. vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
  291. ret = vringh_init_kern(&vvr->vrh,
  292. *(u32 *)mic_vq_features(vdev->dd),
  293. num, false, vr->vr.desc, vr->vr.avail,
  294. vr->vr.used);
  295. if (ret) {
  296. dev_err(vop_dev(vdev), "%s %d err %d\n",
  297. __func__, __LINE__, ret);
  298. goto err;
  299. }
  300. vringh_kiov_init(&vvr->riov, NULL, 0);
  301. vringh_kiov_init(&vvr->wiov, NULL, 0);
  302. vvr->head = USHRT_MAX;
  303. vvr->vdev = vdev;
  304. vvr->vrh.notify = _vop_notify;
  305. dev_dbg(&vpdev->dev,
  306. "%s %d index %d va %p info %p vr_size 0x%x\n",
  307. __func__, __LINE__, i, vr->va, vr->info, vr_size);
  308. vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
  309. get_order(VOP_INT_DMA_BUF_SIZE));
  310. vvr->buf_da = dma_map_single(&vpdev->dev,
  311. vvr->buf, VOP_INT_DMA_BUF_SIZE,
  312. DMA_BIDIRECTIONAL);
  313. }
  314. snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
  315. vdev->virtio_id);
  316. vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
  317. vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
  318. _vop_virtio_intr_handler, irqname, vdev,
  319. vdev->virtio_db);
  320. if (IS_ERR(vdev->virtio_cookie)) {
  321. ret = PTR_ERR(vdev->virtio_cookie);
  322. dev_dbg(&vpdev->dev, "request irq failed\n");
  323. goto err;
  324. }
  325. vdev->dc->c2h_vdev_db = vdev->virtio_db;
  326. /*
  327. * Order the type update with previous stores. This write barrier
  328. * is paired with the corresponding read barrier before the uncached
  329. * system memory read of the type, on the card while scanning the
  330. * device page.
  331. */
  332. smp_wmb();
  333. dd->type = type;
  334. argp->type = type;
  335. if (bootparam) {
  336. db = bootparam->h2c_config_db;
  337. if (db != -1)
  338. vpdev->hw_ops->send_intr(vpdev, db);
  339. }
  340. dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
  341. return 0;
  342. err:
  343. vqconfig = mic_vq_config(dd);
  344. for (j = 0; j < i; j++) {
  345. struct vop_vringh *vvr = &vdev->vvr[j];
  346. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
  347. vvr->vring.len, DMA_BIDIRECTIONAL);
  348. free_pages((unsigned long)vvr->vring.va,
  349. get_order(vvr->vring.len));
  350. }
  351. return ret;
  352. }
  353. static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
  354. struct vop_device *vpdev)
  355. {
  356. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  357. s8 db;
  358. int ret, retry;
  359. DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
  360. devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
  361. db = bootparam->h2c_config_db;
  362. if (db != -1)
  363. vpdev->hw_ops->send_intr(vpdev, db);
  364. else
  365. goto done;
  366. for (retry = 15; retry--;) {
  367. ret = wait_event_timeout(wake, devp->guest_ack,
  368. msecs_to_jiffies(1000));
  369. if (ret)
  370. break;
  371. }
  372. done:
  373. devp->config_change = 0;
  374. devp->guest_ack = 0;
  375. }
  376. static void vop_virtio_del_device(struct vop_vdev *vdev)
  377. {
  378. struct vop_info *vi = vdev->vi;
  379. struct vop_device *vpdev = vdev->vpdev;
  380. int i;
  381. struct mic_vqconfig *vqconfig;
  382. struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
  383. if (!bootparam)
  384. goto skip_hot_remove;
  385. vop_dev_remove(vi, vdev->dc, vpdev);
  386. skip_hot_remove:
  387. vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
  388. flush_work(&vdev->virtio_bh_work);
  389. vqconfig = mic_vq_config(vdev->dd);
  390. for (i = 0; i < vdev->dd->num_vq; i++) {
  391. struct vop_vringh *vvr = &vdev->vvr[i];
  392. dma_unmap_single(&vpdev->dev,
  393. vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
  394. DMA_BIDIRECTIONAL);
  395. free_pages((unsigned long)vvr->buf,
  396. get_order(VOP_INT_DMA_BUF_SIZE));
  397. vringh_kiov_cleanup(&vvr->riov);
  398. vringh_kiov_cleanup(&vvr->wiov);
  399. dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
  400. vvr->vring.len, DMA_BIDIRECTIONAL);
  401. free_pages((unsigned long)vvr->vring.va,
  402. get_order(vvr->vring.len));
  403. }
  404. /*
  405. * Order the type update with previous stores. This write barrier
  406. * is paired with the corresponding read barrier before the uncached
  407. * system memory read of the type, on the card while scanning the
  408. * device page.
  409. */
  410. smp_wmb();
  411. vdev->dd->type = -1;
  412. }
  413. /*
  414. * vop_sync_dma - Wrapper for synchronous DMAs.
  415. *
  416. * @dev - The address of the pointer to the device instance used
  417. * for DMA registration.
  418. * @dst - destination DMA address.
  419. * @src - source DMA address.
  420. * @len - size of the transfer.
  421. *
  422. * Return DMA_SUCCESS on success
  423. */
  424. static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
  425. size_t len)
  426. {
  427. int err = 0;
  428. struct dma_device *ddev;
  429. struct dma_async_tx_descriptor *tx;
  430. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  431. struct dma_chan *vop_ch = vi->dma_ch;
  432. if (!vop_ch) {
  433. err = -EBUSY;
  434. goto error;
  435. }
  436. ddev = vop_ch->device;
  437. tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
  438. DMA_PREP_FENCE);
  439. if (!tx) {
  440. err = -ENOMEM;
  441. goto error;
  442. } else {
  443. dma_cookie_t cookie;
  444. cookie = tx->tx_submit(tx);
  445. if (dma_submit_error(cookie)) {
  446. err = -ENOMEM;
  447. goto error;
  448. }
  449. dma_async_issue_pending(vop_ch);
  450. err = dma_sync_wait(vop_ch, cookie);
  451. }
  452. error:
  453. if (err)
  454. dev_err(&vi->vpdev->dev, "%s %d err %d\n",
  455. __func__, __LINE__, err);
  456. return err;
  457. }
  458. #define VOP_USE_DMA true
  459. /*
  460. * Initiates the copies across the PCIe bus from card memory to a user
  461. * space buffer. When transfers are done using DMA, source/destination
  462. * addresses and transfer length must follow the alignment requirements of
  463. * the MIC DMA engine.
  464. */
  465. static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
  466. size_t len, u64 daddr, size_t dlen,
  467. int vr_idx)
  468. {
  469. struct vop_device *vpdev = vdev->vpdev;
  470. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  471. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  472. struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
  473. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  474. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  475. size_t dma_offset, partlen;
  476. int err;
  477. if (!VOP_USE_DMA) {
  478. if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
  479. err = -EFAULT;
  480. dev_err(vop_dev(vdev), "%s %d err %d\n",
  481. __func__, __LINE__, err);
  482. goto err;
  483. }
  484. vdev->in_bytes += len;
  485. err = 0;
  486. goto err;
  487. }
  488. dma_offset = daddr - round_down(daddr, dma_alignment);
  489. daddr -= dma_offset;
  490. len += dma_offset;
  491. /*
  492. * X100 uses DMA addresses as seen by the card so adding
  493. * the aperture base is not required for DMA. However x200
  494. * requires DMA addresses to be an offset into the bar so
  495. * add the aperture base for x200.
  496. */
  497. if (x200)
  498. daddr += vpdev->aper->pa;
  499. while (len) {
  500. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  501. err = vop_sync_dma(vdev, vvr->buf_da, daddr,
  502. ALIGN(partlen, dma_alignment));
  503. if (err) {
  504. dev_err(vop_dev(vdev), "%s %d err %d\n",
  505. __func__, __LINE__, err);
  506. goto err;
  507. }
  508. if (copy_to_user(ubuf, vvr->buf + dma_offset,
  509. partlen - dma_offset)) {
  510. err = -EFAULT;
  511. dev_err(vop_dev(vdev), "%s %d err %d\n",
  512. __func__, __LINE__, err);
  513. goto err;
  514. }
  515. daddr += partlen;
  516. ubuf += partlen;
  517. dbuf += partlen;
  518. vdev->in_bytes_dma += partlen;
  519. vdev->in_bytes += partlen;
  520. len -= partlen;
  521. dma_offset = 0;
  522. }
  523. err = 0;
  524. err:
  525. vpdev->hw_ops->iounmap(vpdev, dbuf);
  526. dev_dbg(vop_dev(vdev),
  527. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  528. __func__, ubuf, dbuf, len, vr_idx);
  529. return err;
  530. }
  531. /*
  532. * Initiates copies across the PCIe bus from a user space buffer to card
  533. * memory. When transfers are done using DMA, source/destination addresses
  534. * and transfer length must follow the alignment requirements of the MIC
  535. * DMA engine.
  536. */
  537. static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
  538. size_t len, u64 daddr, size_t dlen,
  539. int vr_idx)
  540. {
  541. struct vop_device *vpdev = vdev->vpdev;
  542. void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
  543. struct vop_vringh *vvr = &vdev->vvr[vr_idx];
  544. struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
  545. size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
  546. bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
  547. size_t partlen;
  548. bool dma = VOP_USE_DMA;
  549. int err = 0;
  550. if (daddr & (dma_alignment - 1)) {
  551. vdev->tx_dst_unaligned += len;
  552. dma = false;
  553. } else if (ALIGN(len, dma_alignment) > dlen) {
  554. vdev->tx_len_unaligned += len;
  555. dma = false;
  556. }
  557. if (!dma)
  558. goto memcpy;
  559. /*
  560. * X100 uses DMA addresses as seen by the card so adding
  561. * the aperture base is not required for DMA. However x200
  562. * requires DMA addresses to be an offset into the bar so
  563. * add the aperture base for x200.
  564. */
  565. if (x200)
  566. daddr += vpdev->aper->pa;
  567. while (len) {
  568. partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
  569. if (copy_from_user(vvr->buf, ubuf, partlen)) {
  570. err = -EFAULT;
  571. dev_err(vop_dev(vdev), "%s %d err %d\n",
  572. __func__, __LINE__, err);
  573. goto err;
  574. }
  575. err = vop_sync_dma(vdev, daddr, vvr->buf_da,
  576. ALIGN(partlen, dma_alignment));
  577. if (err) {
  578. dev_err(vop_dev(vdev), "%s %d err %d\n",
  579. __func__, __LINE__, err);
  580. goto err;
  581. }
  582. daddr += partlen;
  583. ubuf += partlen;
  584. dbuf += partlen;
  585. vdev->out_bytes_dma += partlen;
  586. vdev->out_bytes += partlen;
  587. len -= partlen;
  588. }
  589. memcpy:
  590. /*
  591. * We are copying to IO below and should ideally use something
  592. * like copy_from_user_toio(..) if it existed.
  593. */
  594. if (copy_from_user((void __force *)dbuf, ubuf, len)) {
  595. err = -EFAULT;
  596. dev_err(vop_dev(vdev), "%s %d err %d\n",
  597. __func__, __LINE__, err);
  598. goto err;
  599. }
  600. vdev->out_bytes += len;
  601. err = 0;
  602. err:
  603. vpdev->hw_ops->iounmap(vpdev, dbuf);
  604. dev_dbg(vop_dev(vdev),
  605. "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
  606. __func__, ubuf, dbuf, len, vr_idx);
  607. return err;
  608. }
  609. #define MIC_VRINGH_READ true
  610. /* Determine the total number of bytes consumed in a VRINGH KIOV */
  611. static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
  612. {
  613. int i;
  614. u32 total = iov->consumed;
  615. for (i = 0; i < iov->i; i++)
  616. total += iov->iov[i].iov_len;
  617. return total;
  618. }
  619. /*
  620. * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
  621. * This API is heavily based on the vringh_iov_xfer(..) implementation
  622. * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
  623. * and vringh_iov_push_kern(..) directly is because there is no
  624. * way to override the VRINGH xfer(..) routines as of v3.10.
  625. */
  626. static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
  627. void __user *ubuf, size_t len, bool read, int vr_idx,
  628. size_t *out_len)
  629. {
  630. int ret = 0;
  631. size_t partlen, tot_len = 0;
  632. while (len && iov->i < iov->used) {
  633. struct kvec *kiov = &iov->iov[iov->i];
  634. partlen = min(kiov->iov_len, len);
  635. if (read)
  636. ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
  637. (u64)kiov->iov_base,
  638. kiov->iov_len,
  639. vr_idx);
  640. else
  641. ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
  642. (u64)kiov->iov_base,
  643. kiov->iov_len,
  644. vr_idx);
  645. if (ret) {
  646. dev_err(vop_dev(vdev), "%s %d err %d\n",
  647. __func__, __LINE__, ret);
  648. break;
  649. }
  650. len -= partlen;
  651. ubuf += partlen;
  652. tot_len += partlen;
  653. iov->consumed += partlen;
  654. kiov->iov_len -= partlen;
  655. kiov->iov_base += partlen;
  656. if (!kiov->iov_len) {
  657. /* Fix up old iov element then increment. */
  658. kiov->iov_len = iov->consumed;
  659. kiov->iov_base -= iov->consumed;
  660. iov->consumed = 0;
  661. iov->i++;
  662. }
  663. }
  664. *out_len = tot_len;
  665. return ret;
  666. }
  667. /*
  668. * Use the standard VRINGH infrastructure in the kernel to fetch new
  669. * descriptors, initiate the copies and update the used ring.
  670. */
  671. static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
  672. {
  673. int ret = 0;
  674. u32 iovcnt = copy->iovcnt;
  675. struct iovec iov;
  676. struct iovec __user *u_iov = copy->iov;
  677. void __user *ubuf = NULL;
  678. struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
  679. struct vringh_kiov *riov = &vvr->riov;
  680. struct vringh_kiov *wiov = &vvr->wiov;
  681. struct vringh *vrh = &vvr->vrh;
  682. u16 *head = &vvr->head;
  683. struct mic_vring *vr = &vvr->vring;
  684. size_t len = 0, out_len;
  685. copy->out_len = 0;
  686. /* Fetch a new IOVEC if all previous elements have been processed */
  687. if (riov->i == riov->used && wiov->i == wiov->used) {
  688. ret = vringh_getdesc_kern(vrh, riov, wiov,
  689. head, GFP_KERNEL);
  690. /* Check if there are available descriptors */
  691. if (ret <= 0)
  692. return ret;
  693. }
  694. while (iovcnt) {
  695. if (!len) {
  696. /* Copy over a new iovec from user space. */
  697. ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
  698. if (ret) {
  699. ret = -EINVAL;
  700. dev_err(vop_dev(vdev), "%s %d err %d\n",
  701. __func__, __LINE__, ret);
  702. break;
  703. }
  704. len = iov.iov_len;
  705. ubuf = iov.iov_base;
  706. }
  707. /* Issue all the read descriptors first */
  708. ret = vop_vringh_copy(vdev, riov, ubuf, len,
  709. MIC_VRINGH_READ, copy->vr_idx, &out_len);
  710. if (ret) {
  711. dev_err(vop_dev(vdev), "%s %d err %d\n",
  712. __func__, __LINE__, ret);
  713. break;
  714. }
  715. len -= out_len;
  716. ubuf += out_len;
  717. copy->out_len += out_len;
  718. /* Issue the write descriptors next */
  719. ret = vop_vringh_copy(vdev, wiov, ubuf, len,
  720. !MIC_VRINGH_READ, copy->vr_idx, &out_len);
  721. if (ret) {
  722. dev_err(vop_dev(vdev), "%s %d err %d\n",
  723. __func__, __LINE__, ret);
  724. break;
  725. }
  726. len -= out_len;
  727. ubuf += out_len;
  728. copy->out_len += out_len;
  729. if (!len) {
  730. /* One user space iovec is now completed */
  731. iovcnt--;
  732. u_iov++;
  733. }
  734. /* Exit loop if all elements in KIOVs have been processed. */
  735. if (riov->i == riov->used && wiov->i == wiov->used)
  736. break;
  737. }
  738. /*
  739. * Update the used ring if a descriptor was available and some data was
  740. * copied in/out and the user asked for a used ring update.
  741. */
  742. if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
  743. u32 total = 0;
  744. /* Determine the total data consumed */
  745. total += vop_vringh_iov_consumed(riov);
  746. total += vop_vringh_iov_consumed(wiov);
  747. vringh_complete_kern(vrh, *head, total);
  748. *head = USHRT_MAX;
  749. if (vringh_need_notify_kern(vrh) > 0)
  750. vringh_notify(vrh);
  751. vringh_kiov_cleanup(riov);
  752. vringh_kiov_cleanup(wiov);
  753. /* Update avail idx for user space */
  754. vr->info->avail_idx = vrh->last_avail_idx;
  755. }
  756. return ret;
  757. }
  758. static inline int vop_verify_copy_args(struct vop_vdev *vdev,
  759. struct mic_copy_desc *copy)
  760. {
  761. if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
  762. return -EINVAL;
  763. return 0;
  764. }
  765. /* Copy a specified number of virtio descriptors in a chain */
  766. static int vop_virtio_copy_desc(struct vop_vdev *vdev,
  767. struct mic_copy_desc *copy)
  768. {
  769. int err;
  770. struct vop_vringh *vvr;
  771. err = vop_verify_copy_args(vdev, copy);
  772. if (err)
  773. return err;
  774. vvr = &vdev->vvr[copy->vr_idx];
  775. mutex_lock(&vvr->vr_mutex);
  776. if (!vop_vdevup(vdev)) {
  777. err = -ENODEV;
  778. dev_err(vop_dev(vdev), "%s %d err %d\n",
  779. __func__, __LINE__, err);
  780. goto err;
  781. }
  782. err = _vop_virtio_copy(vdev, copy);
  783. if (err) {
  784. dev_err(vop_dev(vdev), "%s %d err %d\n",
  785. __func__, __LINE__, err);
  786. }
  787. err:
  788. mutex_unlock(&vvr->vr_mutex);
  789. return err;
  790. }
  791. static int vop_open(struct inode *inode, struct file *f)
  792. {
  793. struct vop_vdev *vdev;
  794. struct vop_info *vi = container_of(f->private_data,
  795. struct vop_info, miscdev);
  796. vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
  797. if (!vdev)
  798. return -ENOMEM;
  799. vdev->vi = vi;
  800. mutex_init(&vdev->vdev_mutex);
  801. f->private_data = vdev;
  802. init_completion(&vdev->destroy);
  803. complete(&vdev->destroy);
  804. return 0;
  805. }
  806. static int vop_release(struct inode *inode, struct file *f)
  807. {
  808. struct vop_vdev *vdev = f->private_data, *vdev_tmp;
  809. struct vop_info *vi = vdev->vi;
  810. struct list_head *pos, *tmp;
  811. bool found = false;
  812. mutex_lock(&vdev->vdev_mutex);
  813. if (vdev->deleted)
  814. goto unlock;
  815. mutex_lock(&vi->vop_mutex);
  816. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  817. vdev_tmp = list_entry(pos, struct vop_vdev, list);
  818. if (vdev == vdev_tmp) {
  819. vop_virtio_del_device(vdev);
  820. list_del(pos);
  821. found = true;
  822. break;
  823. }
  824. }
  825. mutex_unlock(&vi->vop_mutex);
  826. unlock:
  827. mutex_unlock(&vdev->vdev_mutex);
  828. if (!found)
  829. wait_for_completion(&vdev->destroy);
  830. f->private_data = NULL;
  831. kfree(vdev);
  832. return 0;
  833. }
  834. static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  835. {
  836. struct vop_vdev *vdev = f->private_data;
  837. struct vop_info *vi = vdev->vi;
  838. void __user *argp = (void __user *)arg;
  839. int ret;
  840. switch (cmd) {
  841. case MIC_VIRTIO_ADD_DEVICE:
  842. {
  843. struct mic_device_desc dd, *dd_config;
  844. if (copy_from_user(&dd, argp, sizeof(dd)))
  845. return -EFAULT;
  846. if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
  847. dd.num_vq > MIC_MAX_VRINGS)
  848. return -EINVAL;
  849. dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
  850. if (!dd_config)
  851. return -ENOMEM;
  852. if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
  853. ret = -EFAULT;
  854. goto free_ret;
  855. }
  856. /* Ensure desc has not changed between the two reads */
  857. if (memcmp(&dd, dd_config, sizeof(dd))) {
  858. ret = -EINVAL;
  859. goto free_ret;
  860. }
  861. mutex_lock(&vdev->vdev_mutex);
  862. mutex_lock(&vi->vop_mutex);
  863. ret = vop_virtio_add_device(vdev, dd_config);
  864. if (ret)
  865. goto unlock_ret;
  866. list_add_tail(&vdev->list, &vi->vdev_list);
  867. unlock_ret:
  868. mutex_unlock(&vi->vop_mutex);
  869. mutex_unlock(&vdev->vdev_mutex);
  870. free_ret:
  871. kfree(dd_config);
  872. return ret;
  873. }
  874. case MIC_VIRTIO_COPY_DESC:
  875. {
  876. struct mic_copy_desc copy;
  877. mutex_lock(&vdev->vdev_mutex);
  878. ret = vop_vdev_inited(vdev);
  879. if (ret)
  880. goto _unlock_ret;
  881. if (copy_from_user(&copy, argp, sizeof(copy))) {
  882. ret = -EFAULT;
  883. goto _unlock_ret;
  884. }
  885. ret = vop_virtio_copy_desc(vdev, &copy);
  886. if (ret < 0)
  887. goto _unlock_ret;
  888. if (copy_to_user(
  889. &((struct mic_copy_desc __user *)argp)->out_len,
  890. &copy.out_len, sizeof(copy.out_len)))
  891. ret = -EFAULT;
  892. _unlock_ret:
  893. mutex_unlock(&vdev->vdev_mutex);
  894. return ret;
  895. }
  896. case MIC_VIRTIO_CONFIG_CHANGE:
  897. {
  898. void *buf;
  899. mutex_lock(&vdev->vdev_mutex);
  900. ret = vop_vdev_inited(vdev);
  901. if (ret)
  902. goto __unlock_ret;
  903. buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
  904. if (!buf) {
  905. ret = -ENOMEM;
  906. goto __unlock_ret;
  907. }
  908. if (copy_from_user(buf, argp, vdev->dd->config_len)) {
  909. ret = -EFAULT;
  910. goto done;
  911. }
  912. ret = vop_virtio_config_change(vdev, buf);
  913. done:
  914. kfree(buf);
  915. __unlock_ret:
  916. mutex_unlock(&vdev->vdev_mutex);
  917. return ret;
  918. }
  919. default:
  920. return -ENOIOCTLCMD;
  921. };
  922. return 0;
  923. }
  924. /*
  925. * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
  926. * not when previously enqueued buffers may be available. This means that
  927. * in the card->host (TX) path, when userspace is unblocked by poll it
  928. * must drain all available descriptors or it can stall.
  929. */
  930. static unsigned int vop_poll(struct file *f, poll_table *wait)
  931. {
  932. struct vop_vdev *vdev = f->private_data;
  933. int mask = 0;
  934. mutex_lock(&vdev->vdev_mutex);
  935. if (vop_vdev_inited(vdev)) {
  936. mask = POLLERR;
  937. goto done;
  938. }
  939. poll_wait(f, &vdev->waitq, wait);
  940. if (vop_vdev_inited(vdev)) {
  941. mask = POLLERR;
  942. } else if (vdev->poll_wake) {
  943. vdev->poll_wake = 0;
  944. mask = POLLIN | POLLOUT;
  945. }
  946. done:
  947. mutex_unlock(&vdev->vdev_mutex);
  948. return mask;
  949. }
  950. static inline int
  951. vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
  952. unsigned long *size, unsigned long *pa)
  953. {
  954. struct vop_device *vpdev = vdev->vpdev;
  955. unsigned long start = MIC_DP_SIZE;
  956. int i;
  957. /*
  958. * MMAP interface is as follows:
  959. * offset region
  960. * 0x0 virtio device_page
  961. * 0x1000 first vring
  962. * 0x1000 + size of 1st vring second vring
  963. * ....
  964. */
  965. if (!offset) {
  966. *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
  967. *size = MIC_DP_SIZE;
  968. return 0;
  969. }
  970. for (i = 0; i < vdev->dd->num_vq; i++) {
  971. struct vop_vringh *vvr = &vdev->vvr[i];
  972. if (offset == start) {
  973. *pa = virt_to_phys(vvr->vring.va);
  974. *size = vvr->vring.len;
  975. return 0;
  976. }
  977. start += vvr->vring.len;
  978. }
  979. return -1;
  980. }
  981. /*
  982. * Maps the device page and virtio rings to user space for readonly access.
  983. */
  984. static int vop_mmap(struct file *f, struct vm_area_struct *vma)
  985. {
  986. struct vop_vdev *vdev = f->private_data;
  987. unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
  988. unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
  989. int i, err;
  990. err = vop_vdev_inited(vdev);
  991. if (err)
  992. goto ret;
  993. if (vma->vm_flags & VM_WRITE) {
  994. err = -EACCES;
  995. goto ret;
  996. }
  997. while (size_rem) {
  998. i = vop_query_offset(vdev, offset, &size, &pa);
  999. if (i < 0) {
  1000. err = -EINVAL;
  1001. goto ret;
  1002. }
  1003. err = remap_pfn_range(vma, vma->vm_start + offset,
  1004. pa >> PAGE_SHIFT, size,
  1005. vma->vm_page_prot);
  1006. if (err)
  1007. goto ret;
  1008. size_rem -= size;
  1009. offset += size;
  1010. }
  1011. ret:
  1012. return err;
  1013. }
  1014. static const struct file_operations vop_fops = {
  1015. .open = vop_open,
  1016. .release = vop_release,
  1017. .unlocked_ioctl = vop_ioctl,
  1018. .poll = vop_poll,
  1019. .mmap = vop_mmap,
  1020. .owner = THIS_MODULE,
  1021. };
  1022. int vop_host_init(struct vop_info *vi)
  1023. {
  1024. int rc;
  1025. struct miscdevice *mdev;
  1026. struct vop_device *vpdev = vi->vpdev;
  1027. INIT_LIST_HEAD(&vi->vdev_list);
  1028. vi->dma_ch = vpdev->dma_ch;
  1029. mdev = &vi->miscdev;
  1030. mdev->minor = MISC_DYNAMIC_MINOR;
  1031. snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
  1032. mdev->name = vi->name;
  1033. mdev->fops = &vop_fops;
  1034. mdev->parent = &vpdev->dev;
  1035. rc = misc_register(mdev);
  1036. if (rc)
  1037. dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
  1038. return rc;
  1039. }
  1040. void vop_host_uninit(struct vop_info *vi)
  1041. {
  1042. struct list_head *pos, *tmp;
  1043. struct vop_vdev *vdev;
  1044. mutex_lock(&vi->vop_mutex);
  1045. vop_virtio_reset_devices(vi);
  1046. list_for_each_safe(pos, tmp, &vi->vdev_list) {
  1047. vdev = list_entry(pos, struct vop_vdev, list);
  1048. list_del(pos);
  1049. reinit_completion(&vdev->destroy);
  1050. mutex_unlock(&vi->vop_mutex);
  1051. mutex_lock(&vdev->vdev_mutex);
  1052. vop_virtio_del_device(vdev);
  1053. vdev->deleted = true;
  1054. mutex_unlock(&vdev->vdev_mutex);
  1055. complete(&vdev->destroy);
  1056. mutex_lock(&vi->vop_mutex);
  1057. }
  1058. mutex_unlock(&vi->vop_mutex);
  1059. misc_deregister(&vi->miscdev);
  1060. }