svc_rdma_sendto.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. /*
  2. * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  3. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the BSD-type
  9. * license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions
  13. * are met:
  14. *
  15. * Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. *
  18. * Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials provided
  21. * with the distribution.
  22. *
  23. * Neither the name of the Network Appliance, Inc. nor the names of
  24. * its contributors may be used to endorse or promote products
  25. * derived from this software without specific prior written
  26. * permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * Author: Tom Tucker <tom@opengridcomputing.com>
  41. */
  42. #include <linux/sunrpc/debug.h>
  43. #include <linux/sunrpc/rpc_rdma.h>
  44. #include <linux/spinlock.h>
  45. #include <asm/unaligned.h>
  46. #include <rdma/ib_verbs.h>
  47. #include <rdma/rdma_cm.h>
  48. #include <linux/sunrpc/svc_rdma.h>
  49. #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  50. static u32 xdr_padsize(u32 len)
  51. {
  52. return (len & 3) ? (4 - (len & 3)) : 0;
  53. }
  54. int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
  55. struct xdr_buf *xdr,
  56. struct svc_rdma_req_map *vec,
  57. bool write_chunk_present)
  58. {
  59. int sge_no;
  60. u32 sge_bytes;
  61. u32 page_bytes;
  62. u32 page_off;
  63. int page_no;
  64. if (xdr->len !=
  65. (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
  66. pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
  67. return -EIO;
  68. }
  69. /* Skip the first sge, this is for the RPCRDMA header */
  70. sge_no = 1;
  71. /* Head SGE */
  72. vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
  73. vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
  74. sge_no++;
  75. /* pages SGE */
  76. page_no = 0;
  77. page_bytes = xdr->page_len;
  78. page_off = xdr->page_base;
  79. while (page_bytes) {
  80. vec->sge[sge_no].iov_base =
  81. page_address(xdr->pages[page_no]) + page_off;
  82. sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
  83. page_bytes -= sge_bytes;
  84. vec->sge[sge_no].iov_len = sge_bytes;
  85. sge_no++;
  86. page_no++;
  87. page_off = 0; /* reset for next time through loop */
  88. }
  89. /* Tail SGE */
  90. if (xdr->tail[0].iov_len) {
  91. unsigned char *base = xdr->tail[0].iov_base;
  92. size_t len = xdr->tail[0].iov_len;
  93. u32 xdr_pad = xdr_padsize(xdr->page_len);
  94. if (write_chunk_present && xdr_pad) {
  95. base += xdr_pad;
  96. len -= xdr_pad;
  97. }
  98. if (len) {
  99. vec->sge[sge_no].iov_base = base;
  100. vec->sge[sge_no].iov_len = len;
  101. sge_no++;
  102. }
  103. }
  104. dprintk("svcrdma: %s: sge_no %d page_no %d "
  105. "page_base %u page_len %u head_len %zu tail_len %zu\n",
  106. __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
  107. xdr->head[0].iov_len, xdr->tail[0].iov_len);
  108. vec->count = sge_no;
  109. return 0;
  110. }
  111. static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
  112. struct xdr_buf *xdr,
  113. u32 xdr_off, size_t len, int dir)
  114. {
  115. struct page *page;
  116. dma_addr_t dma_addr;
  117. if (xdr_off < xdr->head[0].iov_len) {
  118. /* This offset is in the head */
  119. xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
  120. page = virt_to_page(xdr->head[0].iov_base);
  121. } else {
  122. xdr_off -= xdr->head[0].iov_len;
  123. if (xdr_off < xdr->page_len) {
  124. /* This offset is in the page list */
  125. xdr_off += xdr->page_base;
  126. page = xdr->pages[xdr_off >> PAGE_SHIFT];
  127. xdr_off &= ~PAGE_MASK;
  128. } else {
  129. /* This offset is in the tail */
  130. xdr_off -= xdr->page_len;
  131. xdr_off += (unsigned long)
  132. xdr->tail[0].iov_base & ~PAGE_MASK;
  133. page = virt_to_page(xdr->tail[0].iov_base);
  134. }
  135. }
  136. dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
  137. min_t(size_t, PAGE_SIZE, len), dir);
  138. return dma_addr;
  139. }
  140. /* Returns the address of the first read chunk or <nul> if no read chunk
  141. * is present
  142. */
  143. struct rpcrdma_read_chunk *
  144. svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
  145. {
  146. struct rpcrdma_read_chunk *ch =
  147. (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
  148. if (ch->rc_discrim == xdr_zero)
  149. return NULL;
  150. return ch;
  151. }
  152. /* Returns the address of the first read write array element or <nul>
  153. * if no write array list is present
  154. */
  155. static struct rpcrdma_write_array *
  156. svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
  157. {
  158. if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
  159. rmsgp->rm_body.rm_chunks[1] == xdr_zero)
  160. return NULL;
  161. return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1];
  162. }
  163. /* Returns the address of the first reply array element or <nul> if no
  164. * reply array is present
  165. */
  166. static struct rpcrdma_write_array *
  167. svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
  168. struct rpcrdma_write_array *wr_ary)
  169. {
  170. struct rpcrdma_read_chunk *rch;
  171. struct rpcrdma_write_array *rp_ary;
  172. /* XXX: Need to fix when reply chunk may occur with read list
  173. * and/or write list.
  174. */
  175. if (rmsgp->rm_body.rm_chunks[0] != xdr_zero ||
  176. rmsgp->rm_body.rm_chunks[1] != xdr_zero)
  177. return NULL;
  178. rch = svc_rdma_get_read_chunk(rmsgp);
  179. if (rch) {
  180. while (rch->rc_discrim != xdr_zero)
  181. rch++;
  182. /* The reply chunk follows an empty write array located
  183. * at 'rc_position' here. The reply array is at rc_target.
  184. */
  185. rp_ary = (struct rpcrdma_write_array *)&rch->rc_target;
  186. goto found_it;
  187. }
  188. if (wr_ary) {
  189. int chunk = be32_to_cpu(wr_ary->wc_nchunks);
  190. rp_ary = (struct rpcrdma_write_array *)
  191. &wr_ary->wc_array[chunk].wc_target.rs_length;
  192. goto found_it;
  193. }
  194. /* No read list, no write list */
  195. rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2];
  196. found_it:
  197. if (rp_ary->wc_discrim == xdr_zero)
  198. return NULL;
  199. return rp_ary;
  200. }
  201. /* RPC-over-RDMA Version One private extension: Remote Invalidation.
  202. * Responder's choice: requester signals it can handle Send With
  203. * Invalidate, and responder chooses one rkey to invalidate.
  204. *
  205. * Find a candidate rkey to invalidate when sending a reply. Picks the
  206. * first rkey it finds in the chunks lists.
  207. *
  208. * Returns zero if RPC's chunk lists are empty.
  209. */
  210. static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
  211. struct rpcrdma_write_array *wr_ary,
  212. struct rpcrdma_write_array *rp_ary)
  213. {
  214. struct rpcrdma_read_chunk *rd_ary;
  215. struct rpcrdma_segment *arg_ch;
  216. u32 inv_rkey;
  217. inv_rkey = 0;
  218. rd_ary = svc_rdma_get_read_chunk(rdma_argp);
  219. if (rd_ary) {
  220. inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
  221. goto out;
  222. }
  223. if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
  224. arg_ch = &wr_ary->wc_array[0].wc_target;
  225. inv_rkey = be32_to_cpu(arg_ch->rs_handle);
  226. goto out;
  227. }
  228. if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
  229. arg_ch = &rp_ary->wc_array[0].wc_target;
  230. inv_rkey = be32_to_cpu(arg_ch->rs_handle);
  231. goto out;
  232. }
  233. out:
  234. dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
  235. return inv_rkey;
  236. }
  237. /* Assumptions:
  238. * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  239. */
  240. static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
  241. u32 rmr, u64 to,
  242. u32 xdr_off, int write_len,
  243. struct svc_rdma_req_map *vec)
  244. {
  245. struct ib_rdma_wr write_wr;
  246. struct ib_sge *sge;
  247. int xdr_sge_no;
  248. int sge_no;
  249. int sge_bytes;
  250. int sge_off;
  251. int bc;
  252. struct svc_rdma_op_ctxt *ctxt;
  253. if (vec->count > RPCSVC_MAXPAGES) {
  254. pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
  255. return -EIO;
  256. }
  257. dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
  258. "write_len=%d, vec->sge=%p, vec->count=%lu\n",
  259. rmr, (unsigned long long)to, xdr_off,
  260. write_len, vec->sge, vec->count);
  261. ctxt = svc_rdma_get_context(xprt);
  262. ctxt->direction = DMA_TO_DEVICE;
  263. sge = ctxt->sge;
  264. /* Find the SGE associated with xdr_off */
  265. for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count;
  266. xdr_sge_no++) {
  267. if (vec->sge[xdr_sge_no].iov_len > bc)
  268. break;
  269. bc -= vec->sge[xdr_sge_no].iov_len;
  270. }
  271. sge_off = bc;
  272. bc = write_len;
  273. sge_no = 0;
  274. /* Copy the remaining SGE */
  275. while (bc != 0) {
  276. sge_bytes = min_t(size_t,
  277. bc, vec->sge[xdr_sge_no].iov_len-sge_off);
  278. sge[sge_no].length = sge_bytes;
  279. sge[sge_no].addr =
  280. dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
  281. sge_bytes, DMA_TO_DEVICE);
  282. xdr_off += sge_bytes;
  283. if (ib_dma_mapping_error(xprt->sc_cm_id->device,
  284. sge[sge_no].addr))
  285. goto err;
  286. svc_rdma_count_mappings(xprt, ctxt);
  287. sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
  288. ctxt->count++;
  289. sge_off = 0;
  290. sge_no++;
  291. xdr_sge_no++;
  292. if (xdr_sge_no > vec->count) {
  293. pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
  294. goto err;
  295. }
  296. bc -= sge_bytes;
  297. if (sge_no == xprt->sc_max_sge)
  298. break;
  299. }
  300. /* Prepare WRITE WR */
  301. memset(&write_wr, 0, sizeof write_wr);
  302. ctxt->cqe.done = svc_rdma_wc_write;
  303. write_wr.wr.wr_cqe = &ctxt->cqe;
  304. write_wr.wr.sg_list = &sge[0];
  305. write_wr.wr.num_sge = sge_no;
  306. write_wr.wr.opcode = IB_WR_RDMA_WRITE;
  307. write_wr.wr.send_flags = IB_SEND_SIGNALED;
  308. write_wr.rkey = rmr;
  309. write_wr.remote_addr = to;
  310. /* Post It */
  311. atomic_inc(&rdma_stat_write);
  312. if (svc_rdma_send(xprt, &write_wr.wr))
  313. goto err;
  314. return write_len - bc;
  315. err:
  316. svc_rdma_unmap_dma(ctxt);
  317. svc_rdma_put_context(ctxt, 0);
  318. return -EIO;
  319. }
  320. noinline
  321. static int send_write_chunks(struct svcxprt_rdma *xprt,
  322. struct rpcrdma_write_array *wr_ary,
  323. struct rpcrdma_msg *rdma_resp,
  324. struct svc_rqst *rqstp,
  325. struct svc_rdma_req_map *vec)
  326. {
  327. u32 xfer_len = rqstp->rq_res.page_len;
  328. int write_len;
  329. u32 xdr_off;
  330. int chunk_off;
  331. int chunk_no;
  332. int nchunks;
  333. struct rpcrdma_write_array *res_ary;
  334. int ret;
  335. res_ary = (struct rpcrdma_write_array *)
  336. &rdma_resp->rm_body.rm_chunks[1];
  337. /* Write chunks start at the pagelist */
  338. nchunks = be32_to_cpu(wr_ary->wc_nchunks);
  339. for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
  340. xfer_len && chunk_no < nchunks;
  341. chunk_no++) {
  342. struct rpcrdma_segment *arg_ch;
  343. u64 rs_offset;
  344. arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
  345. write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
  346. /* Prepare the response chunk given the length actually
  347. * written */
  348. xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset);
  349. svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
  350. arg_ch->rs_handle,
  351. arg_ch->rs_offset,
  352. write_len);
  353. chunk_off = 0;
  354. while (write_len) {
  355. ret = send_write(xprt, rqstp,
  356. be32_to_cpu(arg_ch->rs_handle),
  357. rs_offset + chunk_off,
  358. xdr_off,
  359. write_len,
  360. vec);
  361. if (ret <= 0)
  362. goto out_err;
  363. chunk_off += ret;
  364. xdr_off += ret;
  365. xfer_len -= ret;
  366. write_len -= ret;
  367. }
  368. }
  369. /* Update the req with the number of chunks actually used */
  370. svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
  371. return rqstp->rq_res.page_len;
  372. out_err:
  373. pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
  374. return -EIO;
  375. }
  376. noinline
  377. static int send_reply_chunks(struct svcxprt_rdma *xprt,
  378. struct rpcrdma_write_array *rp_ary,
  379. struct rpcrdma_msg *rdma_resp,
  380. struct svc_rqst *rqstp,
  381. struct svc_rdma_req_map *vec)
  382. {
  383. u32 xfer_len = rqstp->rq_res.len;
  384. int write_len;
  385. u32 xdr_off;
  386. int chunk_no;
  387. int chunk_off;
  388. int nchunks;
  389. struct rpcrdma_segment *ch;
  390. struct rpcrdma_write_array *res_ary;
  391. int ret;
  392. /* XXX: need to fix when reply lists occur with read-list and or
  393. * write-list */
  394. res_ary = (struct rpcrdma_write_array *)
  395. &rdma_resp->rm_body.rm_chunks[2];
  396. /* xdr offset starts at RPC message */
  397. nchunks = be32_to_cpu(rp_ary->wc_nchunks);
  398. for (xdr_off = 0, chunk_no = 0;
  399. xfer_len && chunk_no < nchunks;
  400. chunk_no++) {
  401. u64 rs_offset;
  402. ch = &rp_ary->wc_array[chunk_no].wc_target;
  403. write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
  404. /* Prepare the reply chunk given the length actually
  405. * written */
  406. xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset);
  407. svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no,
  408. ch->rs_handle, ch->rs_offset,
  409. write_len);
  410. chunk_off = 0;
  411. while (write_len) {
  412. ret = send_write(xprt, rqstp,
  413. be32_to_cpu(ch->rs_handle),
  414. rs_offset + chunk_off,
  415. xdr_off,
  416. write_len,
  417. vec);
  418. if (ret <= 0)
  419. goto out_err;
  420. chunk_off += ret;
  421. xdr_off += ret;
  422. xfer_len -= ret;
  423. write_len -= ret;
  424. }
  425. }
  426. /* Update the req with the number of chunks actually used */
  427. svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
  428. return rqstp->rq_res.len;
  429. out_err:
  430. pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
  431. return -EIO;
  432. }
  433. /* This function prepares the portion of the RPCRDMA message to be
  434. * sent in the RDMA_SEND. This function is called after data sent via
  435. * RDMA has already been transmitted. There are three cases:
  436. * - The RPCRDMA header, RPC header, and payload are all sent in a
  437. * single RDMA_SEND. This is the "inline" case.
  438. * - The RPCRDMA header and some portion of the RPC header and data
  439. * are sent via this RDMA_SEND and another portion of the data is
  440. * sent via RDMA.
  441. * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC
  442. * header and data are all transmitted via RDMA.
  443. * In all three cases, this function prepares the RPCRDMA header in
  444. * sge[0], the 'type' parameter indicates the type to place in the
  445. * RPCRDMA header, and the 'byte_count' field indicates how much of
  446. * the XDR to include in this RDMA_SEND. NB: The offset of the payload
  447. * to send is zero in the XDR.
  448. */
  449. static int send_reply(struct svcxprt_rdma *rdma,
  450. struct svc_rqst *rqstp,
  451. struct page *page,
  452. struct rpcrdma_msg *rdma_resp,
  453. struct svc_rdma_req_map *vec,
  454. int byte_count,
  455. u32 inv_rkey)
  456. {
  457. struct svc_rdma_op_ctxt *ctxt;
  458. struct ib_send_wr send_wr;
  459. u32 xdr_off;
  460. int sge_no;
  461. int sge_bytes;
  462. int page_no;
  463. int pages;
  464. int ret = -EIO;
  465. /* Prepare the context */
  466. ctxt = svc_rdma_get_context(rdma);
  467. ctxt->direction = DMA_TO_DEVICE;
  468. ctxt->pages[0] = page;
  469. ctxt->count = 1;
  470. /* Prepare the SGE for the RPCRDMA Header */
  471. ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
  472. ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
  473. ctxt->sge[0].addr =
  474. ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
  475. ctxt->sge[0].length, DMA_TO_DEVICE);
  476. if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
  477. goto err;
  478. svc_rdma_count_mappings(rdma, ctxt);
  479. ctxt->direction = DMA_TO_DEVICE;
  480. /* Map the payload indicated by 'byte_count' */
  481. xdr_off = 0;
  482. for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
  483. sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
  484. byte_count -= sge_bytes;
  485. ctxt->sge[sge_no].addr =
  486. dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
  487. sge_bytes, DMA_TO_DEVICE);
  488. xdr_off += sge_bytes;
  489. if (ib_dma_mapping_error(rdma->sc_cm_id->device,
  490. ctxt->sge[sge_no].addr))
  491. goto err;
  492. svc_rdma_count_mappings(rdma, ctxt);
  493. ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
  494. ctxt->sge[sge_no].length = sge_bytes;
  495. }
  496. if (byte_count != 0) {
  497. pr_err("svcrdma: Could not map %d bytes\n", byte_count);
  498. goto err;
  499. }
  500. /* Save all respages in the ctxt and remove them from the
  501. * respages array. They are our pages until the I/O
  502. * completes.
  503. */
  504. pages = rqstp->rq_next_page - rqstp->rq_respages;
  505. for (page_no = 0; page_no < pages; page_no++) {
  506. ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
  507. ctxt->count++;
  508. rqstp->rq_respages[page_no] = NULL;
  509. }
  510. rqstp->rq_next_page = rqstp->rq_respages + 1;
  511. if (sge_no > rdma->sc_max_sge) {
  512. pr_err("svcrdma: Too many sges (%d)\n", sge_no);
  513. goto err;
  514. }
  515. memset(&send_wr, 0, sizeof send_wr);
  516. ctxt->cqe.done = svc_rdma_wc_send;
  517. send_wr.wr_cqe = &ctxt->cqe;
  518. send_wr.sg_list = ctxt->sge;
  519. send_wr.num_sge = sge_no;
  520. if (inv_rkey) {
  521. send_wr.opcode = IB_WR_SEND_WITH_INV;
  522. send_wr.ex.invalidate_rkey = inv_rkey;
  523. } else
  524. send_wr.opcode = IB_WR_SEND;
  525. send_wr.send_flags = IB_SEND_SIGNALED;
  526. ret = svc_rdma_send(rdma, &send_wr);
  527. if (ret)
  528. goto err;
  529. return 0;
  530. err:
  531. svc_rdma_unmap_dma(ctxt);
  532. svc_rdma_put_context(ctxt, 1);
  533. return ret;
  534. }
  535. void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
  536. {
  537. }
  538. int svc_rdma_sendto(struct svc_rqst *rqstp)
  539. {
  540. struct svc_xprt *xprt = rqstp->rq_xprt;
  541. struct svcxprt_rdma *rdma =
  542. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  543. struct rpcrdma_msg *rdma_argp;
  544. struct rpcrdma_msg *rdma_resp;
  545. struct rpcrdma_write_array *wr_ary, *rp_ary;
  546. enum rpcrdma_proc reply_type;
  547. int ret;
  548. int inline_bytes;
  549. struct page *res_page;
  550. struct svc_rdma_req_map *vec;
  551. u32 inv_rkey;
  552. dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
  553. /* Get the RDMA request header. The receive logic always
  554. * places this at the start of page 0.
  555. */
  556. rdma_argp = page_address(rqstp->rq_pages[0]);
  557. wr_ary = svc_rdma_get_write_array(rdma_argp);
  558. rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
  559. inv_rkey = 0;
  560. if (rdma->sc_snd_w_inv)
  561. inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
  562. /* Build an req vec for the XDR */
  563. vec = svc_rdma_get_req_map(rdma);
  564. ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
  565. if (ret)
  566. goto err0;
  567. inline_bytes = rqstp->rq_res.len;
  568. /* Create the RDMA response header */
  569. ret = -ENOMEM;
  570. res_page = alloc_page(GFP_KERNEL);
  571. if (!res_page)
  572. goto err0;
  573. rdma_resp = page_address(res_page);
  574. if (rp_ary)
  575. reply_type = RDMA_NOMSG;
  576. else
  577. reply_type = RDMA_MSG;
  578. svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
  579. rdma_resp, reply_type);
  580. /* Send any write-chunk data and build resp write-list */
  581. if (wr_ary) {
  582. ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
  583. if (ret < 0)
  584. goto err1;
  585. inline_bytes -= ret + xdr_padsize(ret);
  586. }
  587. /* Send any reply-list data and update resp reply-list */
  588. if (rp_ary) {
  589. ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
  590. if (ret < 0)
  591. goto err1;
  592. inline_bytes -= ret;
  593. }
  594. /* Post a fresh Receive buffer _before_ sending the reply */
  595. ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
  596. if (ret)
  597. goto err1;
  598. ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
  599. inline_bytes, inv_rkey);
  600. if (ret < 0)
  601. goto err0;
  602. svc_rdma_put_req_map(rdma, vec);
  603. dprintk("svcrdma: send_reply returns %d\n", ret);
  604. return ret;
  605. err1:
  606. put_page(res_page);
  607. err0:
  608. svc_rdma_put_req_map(rdma, vec);
  609. pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
  610. ret);
  611. set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
  612. return -ENOTCONN;
  613. }
  614. void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
  615. int status)
  616. {
  617. struct ib_send_wr err_wr;
  618. struct page *p;
  619. struct svc_rdma_op_ctxt *ctxt;
  620. enum rpcrdma_errcode err;
  621. __be32 *va;
  622. int length;
  623. int ret;
  624. ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
  625. if (ret)
  626. return;
  627. p = alloc_page(GFP_KERNEL);
  628. if (!p)
  629. return;
  630. va = page_address(p);
  631. /* XDR encode an error reply */
  632. err = ERR_CHUNK;
  633. if (status == -EPROTONOSUPPORT)
  634. err = ERR_VERS;
  635. length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
  636. ctxt = svc_rdma_get_context(xprt);
  637. ctxt->direction = DMA_TO_DEVICE;
  638. ctxt->count = 1;
  639. ctxt->pages[0] = p;
  640. /* Prepare SGE for local address */
  641. ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
  642. ctxt->sge[0].length = length;
  643. ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
  644. p, 0, length, DMA_TO_DEVICE);
  645. if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
  646. dprintk("svcrdma: Error mapping buffer for protocol error\n");
  647. svc_rdma_put_context(ctxt, 1);
  648. return;
  649. }
  650. svc_rdma_count_mappings(xprt, ctxt);
  651. /* Prepare SEND WR */
  652. memset(&err_wr, 0, sizeof(err_wr));
  653. ctxt->cqe.done = svc_rdma_wc_send;
  654. err_wr.wr_cqe = &ctxt->cqe;
  655. err_wr.sg_list = ctxt->sge;
  656. err_wr.num_sge = 1;
  657. err_wr.opcode = IB_WR_SEND;
  658. err_wr.send_flags = IB_SEND_SIGNALED;
  659. /* Post It */
  660. ret = svc_rdma_send(xprt, &err_wr);
  661. if (ret) {
  662. dprintk("svcrdma: Error %d posting send for protocol error\n",
  663. ret);
  664. svc_rdma_unmap_dma(ctxt);
  665. svc_rdma_put_context(ctxt, 1);
  666. }
  667. }