objlayout.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. /*
  2. * pNFS Objects layout driver high level definitions
  3. *
  4. * Copyright (C) 2007 Panasas Inc. [year of first publication]
  5. * All rights reserved.
  6. *
  7. * Benny Halevy <bhalevy@panasas.com>
  8. * Boaz Harrosh <bharrosh@panasas.com>
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2
  12. * See the file COPYING included with this distribution for more details.
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *
  18. * 1. Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * 2. Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * 3. Neither the name of the Panasas company nor the names of its
  24. * contributors may be used to endorse or promote products derived
  25. * from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  28. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  29. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  30. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  34. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  35. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. #include <linux/kmod.h>
  40. #include <linux/moduleparam.h>
  41. #include <linux/ratelimit.h>
  42. #include <scsi/osd_initiator.h>
  43. #include "objlayout.h"
  44. #define NFSDBG_FACILITY NFSDBG_PNFS_LD
  45. /*
  46. * Create a objlayout layout structure for the given inode and return it.
  47. */
  48. struct pnfs_layout_hdr *
  49. objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
  50. {
  51. struct objlayout *objlay;
  52. objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
  53. if (objlay) {
  54. spin_lock_init(&objlay->lock);
  55. INIT_LIST_HEAD(&objlay->err_list);
  56. }
  57. dprintk("%s: Return %p\n", __func__, objlay);
  58. return &objlay->pnfs_layout;
  59. }
  60. /*
  61. * Free an objlayout layout structure
  62. */
  63. void
  64. objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
  65. {
  66. struct objlayout *objlay = OBJLAYOUT(lo);
  67. dprintk("%s: objlay %p\n", __func__, objlay);
  68. WARN_ON(!list_empty(&objlay->err_list));
  69. kfree(objlay);
  70. }
  71. /*
  72. * Unmarshall layout and store it in pnfslay.
  73. */
  74. struct pnfs_layout_segment *
  75. objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
  76. struct nfs4_layoutget_res *lgr,
  77. gfp_t gfp_flags)
  78. {
  79. int status = -ENOMEM;
  80. struct xdr_stream stream;
  81. struct xdr_buf buf = {
  82. .pages = lgr->layoutp->pages,
  83. .page_len = lgr->layoutp->len,
  84. .buflen = lgr->layoutp->len,
  85. .len = lgr->layoutp->len,
  86. };
  87. struct page *scratch;
  88. struct pnfs_layout_segment *lseg;
  89. dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
  90. scratch = alloc_page(gfp_flags);
  91. if (!scratch)
  92. goto err_nofree;
  93. xdr_init_decode(&stream, &buf, NULL);
  94. xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
  95. status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
  96. if (unlikely(status)) {
  97. dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
  98. status);
  99. goto err;
  100. }
  101. __free_page(scratch);
  102. dprintk("%s: Return %p\n", __func__, lseg);
  103. return lseg;
  104. err:
  105. __free_page(scratch);
  106. err_nofree:
  107. dprintk("%s: Err Return=>%d\n", __func__, status);
  108. return ERR_PTR(status);
  109. }
  110. /*
  111. * Free a layout segement
  112. */
  113. void
  114. objlayout_free_lseg(struct pnfs_layout_segment *lseg)
  115. {
  116. dprintk("%s: freeing layout segment %p\n", __func__, lseg);
  117. if (unlikely(!lseg))
  118. return;
  119. objio_free_lseg(lseg);
  120. }
  121. /*
  122. * I/O Operations
  123. */
  124. static inline u64
  125. end_offset(u64 start, u64 len)
  126. {
  127. u64 end;
  128. end = start + len;
  129. return end >= start ? end : NFS4_MAX_UINT64;
  130. }
  131. /* last octet in a range */
  132. static inline u64
  133. last_byte_offset(u64 start, u64 len)
  134. {
  135. u64 end;
  136. BUG_ON(!len);
  137. end = start + len;
  138. return end > start ? end - 1 : NFS4_MAX_UINT64;
  139. }
  140. static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
  141. struct page ***p_pages, unsigned *p_pgbase,
  142. u64 offset, unsigned long count)
  143. {
  144. u64 lseg_end_offset;
  145. BUG_ON(offset < lseg->pls_range.offset);
  146. lseg_end_offset = end_offset(lseg->pls_range.offset,
  147. lseg->pls_range.length);
  148. BUG_ON(offset >= lseg_end_offset);
  149. WARN_ON(offset + count > lseg_end_offset);
  150. if (*p_pgbase > PAGE_SIZE) {
  151. dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
  152. *p_pages += *p_pgbase >> PAGE_SHIFT;
  153. *p_pgbase &= ~PAGE_MASK;
  154. }
  155. }
  156. /*
  157. * I/O done common code
  158. */
  159. static void
  160. objlayout_iodone(struct objlayout_io_res *oir)
  161. {
  162. if (likely(oir->status >= 0)) {
  163. objio_free_result(oir);
  164. } else {
  165. struct objlayout *objlay = oir->objlay;
  166. spin_lock(&objlay->lock);
  167. objlay->delta_space_valid = OBJ_DSU_INVALID;
  168. list_add(&objlay->err_list, &oir->err_list);
  169. spin_unlock(&objlay->lock);
  170. }
  171. }
  172. /*
  173. * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
  174. *
  175. * The @index component IO failed (error returned from target). Register
  176. * the error for later reporting at layout-return.
  177. */
  178. void
  179. objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
  180. struct pnfs_osd_objid *pooid, int osd_error,
  181. u64 offset, u64 length, bool is_write)
  182. {
  183. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
  184. BUG_ON(index >= oir->num_comps);
  185. if (osd_error) {
  186. ioerr->oer_component = *pooid;
  187. ioerr->oer_comp_offset = offset;
  188. ioerr->oer_comp_length = length;
  189. ioerr->oer_iswrite = is_write;
  190. ioerr->oer_errno = osd_error;
  191. dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
  192. "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
  193. __func__, index, ioerr->oer_errno,
  194. ioerr->oer_iswrite,
  195. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  196. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  197. ioerr->oer_component.oid_partition_id,
  198. ioerr->oer_component.oid_object_id,
  199. ioerr->oer_comp_offset,
  200. ioerr->oer_comp_length);
  201. } else {
  202. /* User need not call if no error is reported */
  203. ioerr->oer_errno = 0;
  204. }
  205. }
  206. /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
  207. * This is because the osd completion is called with ints-off from
  208. * the block layer
  209. */
  210. static void _rpc_read_complete(struct work_struct *work)
  211. {
  212. struct rpc_task *task;
  213. struct nfs_read_data *rdata;
  214. dprintk("%s enter\n", __func__);
  215. task = container_of(work, struct rpc_task, u.tk_work);
  216. rdata = container_of(task, struct nfs_read_data, task);
  217. pnfs_ld_read_done(rdata);
  218. }
  219. void
  220. objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  221. {
  222. struct nfs_read_data *rdata = oir->rpcdata;
  223. oir->status = rdata->task.tk_status = status;
  224. if (status >= 0)
  225. rdata->res.count = status;
  226. else
  227. rdata->pnfs_error = status;
  228. objlayout_iodone(oir);
  229. /* must not use oir after this point */
  230. dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
  231. status, rdata->res.eof, sync);
  232. if (sync)
  233. pnfs_ld_read_done(rdata);
  234. else {
  235. INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
  236. schedule_work(&rdata->task.u.tk_work);
  237. }
  238. }
  239. /*
  240. * Perform sync or async reads.
  241. */
  242. enum pnfs_try_status
  243. objlayout_read_pagelist(struct nfs_read_data *rdata)
  244. {
  245. loff_t offset = rdata->args.offset;
  246. size_t count = rdata->args.count;
  247. int err;
  248. loff_t eof;
  249. eof = i_size_read(rdata->inode);
  250. if (unlikely(offset + count > eof)) {
  251. if (offset >= eof) {
  252. err = 0;
  253. rdata->res.count = 0;
  254. rdata->res.eof = 1;
  255. /*FIXME: do we need to call pnfs_ld_read_done() */
  256. goto out;
  257. }
  258. count = eof - offset;
  259. }
  260. rdata->res.eof = (offset + count) >= eof;
  261. _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
  262. &rdata->args.pgbase,
  263. rdata->args.offset, rdata->args.count);
  264. dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
  265. __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
  266. err = objio_read_pagelist(rdata);
  267. out:
  268. if (unlikely(err)) {
  269. rdata->pnfs_error = err;
  270. dprintk("%s: Returned Error %d\n", __func__, err);
  271. return PNFS_NOT_ATTEMPTED;
  272. }
  273. return PNFS_ATTEMPTED;
  274. }
  275. /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
  276. * This is because the osd completion is called with ints-off from
  277. * the block layer
  278. */
  279. static void _rpc_write_complete(struct work_struct *work)
  280. {
  281. struct rpc_task *task;
  282. struct nfs_write_data *wdata;
  283. dprintk("%s enter\n", __func__);
  284. task = container_of(work, struct rpc_task, u.tk_work);
  285. wdata = container_of(task, struct nfs_write_data, task);
  286. pnfs_ld_write_done(wdata);
  287. }
  288. void
  289. objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  290. {
  291. struct nfs_write_data *wdata = oir->rpcdata;
  292. oir->status = wdata->task.tk_status = status;
  293. if (status >= 0) {
  294. wdata->res.count = status;
  295. wdata->verf.committed = oir->committed;
  296. } else {
  297. wdata->pnfs_error = status;
  298. }
  299. objlayout_iodone(oir);
  300. /* must not use oir after this point */
  301. dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
  302. status, wdata->verf.committed, sync);
  303. if (sync)
  304. pnfs_ld_write_done(wdata);
  305. else {
  306. INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
  307. schedule_work(&wdata->task.u.tk_work);
  308. }
  309. }
  310. /*
  311. * Perform sync or async writes.
  312. */
  313. enum pnfs_try_status
  314. objlayout_write_pagelist(struct nfs_write_data *wdata,
  315. int how)
  316. {
  317. int err;
  318. _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
  319. &wdata->args.pgbase,
  320. wdata->args.offset, wdata->args.count);
  321. err = objio_write_pagelist(wdata, how);
  322. if (unlikely(err)) {
  323. wdata->pnfs_error = err;
  324. dprintk("%s: Returned Error %d\n", __func__, err);
  325. return PNFS_NOT_ATTEMPTED;
  326. }
  327. return PNFS_ATTEMPTED;
  328. }
  329. void
  330. objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
  331. struct xdr_stream *xdr,
  332. const struct nfs4_layoutcommit_args *args)
  333. {
  334. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  335. struct pnfs_osd_layoutupdate lou;
  336. __be32 *start;
  337. dprintk("%s: Begin\n", __func__);
  338. spin_lock(&objlay->lock);
  339. lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
  340. lou.dsu_delta = objlay->delta_space_used;
  341. objlay->delta_space_used = 0;
  342. objlay->delta_space_valid = OBJ_DSU_INIT;
  343. lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
  344. spin_unlock(&objlay->lock);
  345. start = xdr_reserve_space(xdr, 4);
  346. BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
  347. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  348. dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
  349. lou.dsu_delta, lou.olu_ioerr_flag);
  350. }
  351. static int
  352. err_prio(u32 oer_errno)
  353. {
  354. switch (oer_errno) {
  355. case 0:
  356. return 0;
  357. case PNFS_OSD_ERR_RESOURCE:
  358. return OSD_ERR_PRI_RESOURCE;
  359. case PNFS_OSD_ERR_BAD_CRED:
  360. return OSD_ERR_PRI_BAD_CRED;
  361. case PNFS_OSD_ERR_NO_ACCESS:
  362. return OSD_ERR_PRI_NO_ACCESS;
  363. case PNFS_OSD_ERR_UNREACHABLE:
  364. return OSD_ERR_PRI_UNREACHABLE;
  365. case PNFS_OSD_ERR_NOT_FOUND:
  366. return OSD_ERR_PRI_NOT_FOUND;
  367. case PNFS_OSD_ERR_NO_SPACE:
  368. return OSD_ERR_PRI_NO_SPACE;
  369. default:
  370. WARN_ON(1);
  371. /* fallthrough */
  372. case PNFS_OSD_ERR_EIO:
  373. return OSD_ERR_PRI_EIO;
  374. }
  375. }
  376. static void
  377. merge_ioerr(struct pnfs_osd_ioerr *dest_err,
  378. const struct pnfs_osd_ioerr *src_err)
  379. {
  380. u64 dest_end, src_end;
  381. if (!dest_err->oer_errno) {
  382. *dest_err = *src_err;
  383. /* accumulated device must be blank */
  384. memset(&dest_err->oer_component.oid_device_id, 0,
  385. sizeof(dest_err->oer_component.oid_device_id));
  386. return;
  387. }
  388. if (dest_err->oer_component.oid_partition_id !=
  389. src_err->oer_component.oid_partition_id)
  390. dest_err->oer_component.oid_partition_id = 0;
  391. if (dest_err->oer_component.oid_object_id !=
  392. src_err->oer_component.oid_object_id)
  393. dest_err->oer_component.oid_object_id = 0;
  394. if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
  395. dest_err->oer_comp_offset = src_err->oer_comp_offset;
  396. dest_end = end_offset(dest_err->oer_comp_offset,
  397. dest_err->oer_comp_length);
  398. src_end = end_offset(src_err->oer_comp_offset,
  399. src_err->oer_comp_length);
  400. if (dest_end < src_end)
  401. dest_end = src_end;
  402. dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
  403. if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
  404. (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
  405. dest_err->oer_errno = src_err->oer_errno;
  406. } else if (src_err->oer_iswrite) {
  407. dest_err->oer_iswrite = true;
  408. dest_err->oer_errno = src_err->oer_errno;
  409. }
  410. }
  411. static void
  412. encode_accumulated_error(struct objlayout *objlay, __be32 *p)
  413. {
  414. struct objlayout_io_res *oir, *tmp;
  415. struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
  416. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  417. unsigned i;
  418. for (i = 0; i < oir->num_comps; i++) {
  419. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  420. if (!ioerr->oer_errno)
  421. continue;
  422. printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
  423. "is_write=%d dev(%llx:%llx) par=0x%llx "
  424. "obj=0x%llx offset=0x%llx length=0x%llx\n",
  425. __func__, i, ioerr->oer_errno,
  426. ioerr->oer_iswrite,
  427. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  428. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  429. ioerr->oer_component.oid_partition_id,
  430. ioerr->oer_component.oid_object_id,
  431. ioerr->oer_comp_offset,
  432. ioerr->oer_comp_length);
  433. merge_ioerr(&accumulated_err, ioerr);
  434. }
  435. list_del(&oir->err_list);
  436. objio_free_result(oir);
  437. }
  438. pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
  439. }
  440. void
  441. objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
  442. struct xdr_stream *xdr,
  443. const struct nfs4_layoutreturn_args *args)
  444. {
  445. struct objlayout *objlay = OBJLAYOUT(pnfslay);
  446. struct objlayout_io_res *oir, *tmp;
  447. __be32 *start;
  448. dprintk("%s: Begin\n", __func__);
  449. start = xdr_reserve_space(xdr, 4);
  450. BUG_ON(!start);
  451. spin_lock(&objlay->lock);
  452. list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
  453. __be32 *last_xdr = NULL, *p;
  454. unsigned i;
  455. int res = 0;
  456. for (i = 0; i < oir->num_comps; i++) {
  457. struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
  458. if (!ioerr->oer_errno)
  459. continue;
  460. dprintk("%s: err[%d]: errno=%d is_write=%d "
  461. "dev(%llx:%llx) par=0x%llx obj=0x%llx "
  462. "offset=0x%llx length=0x%llx\n",
  463. __func__, i, ioerr->oer_errno,
  464. ioerr->oer_iswrite,
  465. _DEVID_LO(&ioerr->oer_component.oid_device_id),
  466. _DEVID_HI(&ioerr->oer_component.oid_device_id),
  467. ioerr->oer_component.oid_partition_id,
  468. ioerr->oer_component.oid_object_id,
  469. ioerr->oer_comp_offset,
  470. ioerr->oer_comp_length);
  471. p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
  472. if (unlikely(!p)) {
  473. res = -E2BIG;
  474. break; /* accumulated_error */
  475. }
  476. last_xdr = p;
  477. pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
  478. }
  479. /* TODO: use xdr_write_pages */
  480. if (unlikely(res)) {
  481. /* no space for even one error descriptor */
  482. BUG_ON(!last_xdr);
  483. /* we've encountered a situation with lots and lots of
  484. * errors and no space to encode them all. Use the last
  485. * available slot to report the union of all the
  486. * remaining errors.
  487. */
  488. encode_accumulated_error(objlay, last_xdr);
  489. goto loop_done;
  490. }
  491. list_del(&oir->err_list);
  492. objio_free_result(oir);
  493. }
  494. loop_done:
  495. spin_unlock(&objlay->lock);
  496. *start = cpu_to_be32((xdr->p - start - 1) * 4);
  497. dprintk("%s: Return\n", __func__);
  498. }
  499. /*
  500. * Get Device Info API for io engines
  501. */
  502. struct objlayout_deviceinfo {
  503. struct page *page;
  504. struct pnfs_osd_deviceaddr da; /* This must be last */
  505. };
  506. /* Initialize and call nfs_getdeviceinfo, then decode and return a
  507. * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
  508. * should be called.
  509. */
  510. int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
  511. struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
  512. gfp_t gfp_flags)
  513. {
  514. struct objlayout_deviceinfo *odi;
  515. struct pnfs_device pd;
  516. struct page *page, **pages;
  517. u32 *p;
  518. int err;
  519. page = alloc_page(gfp_flags);
  520. if (!page)
  521. return -ENOMEM;
  522. pages = &page;
  523. pd.pages = pages;
  524. memcpy(&pd.dev_id, d_id, sizeof(*d_id));
  525. pd.layout_type = LAYOUT_OSD2_OBJECTS;
  526. pd.pages = &page;
  527. pd.pgbase = 0;
  528. pd.pglen = PAGE_SIZE;
  529. pd.mincount = 0;
  530. err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
  531. dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
  532. if (err)
  533. goto err_out;
  534. p = page_address(page);
  535. odi = kzalloc(sizeof(*odi), gfp_flags);
  536. if (!odi) {
  537. err = -ENOMEM;
  538. goto err_out;
  539. }
  540. pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
  541. odi->page = page;
  542. *deviceaddr = &odi->da;
  543. return 0;
  544. err_out:
  545. __free_page(page);
  546. return err;
  547. }
  548. void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
  549. {
  550. struct objlayout_deviceinfo *odi = container_of(deviceaddr,
  551. struct objlayout_deviceinfo,
  552. da);
  553. __free_page(odi->page);
  554. kfree(odi);
  555. }
  556. enum {
  557. OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
  558. OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
  559. OSD_LOGIN_UPCALL_PATHLEN = 256
  560. };
  561. static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
  562. module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
  563. 0600);
  564. MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
  565. struct __auto_login {
  566. char uri[OBJLAYOUT_MAX_URI_LEN];
  567. char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
  568. char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
  569. };
  570. static int __objlayout_upcall(struct __auto_login *login)
  571. {
  572. static char *envp[] = { "HOME=/",
  573. "TERM=linux",
  574. "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
  575. NULL
  576. };
  577. char *argv[8];
  578. int ret;
  579. if (unlikely(!osd_login_prog[0])) {
  580. dprintk("%s: osd_login_prog is disabled\n", __func__);
  581. return -EACCES;
  582. }
  583. dprintk("%s uri: %s\n", __func__, login->uri);
  584. dprintk("%s osdname %s\n", __func__, login->osdname);
  585. dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
  586. argv[0] = (char *)osd_login_prog;
  587. argv[1] = "-u";
  588. argv[2] = login->uri;
  589. argv[3] = "-o";
  590. argv[4] = login->osdname;
  591. argv[5] = "-s";
  592. argv[6] = login->systemid_hex;
  593. argv[7] = NULL;
  594. ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
  595. /*
  596. * Disable the upcall mechanism if we're getting an ENOENT or
  597. * EACCES error. The admin can re-enable it on the fly by using
  598. * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
  599. * the problem has been fixed.
  600. */
  601. if (ret == -ENOENT || ret == -EACCES) {
  602. printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
  603. "objlayoutdriver.osd_login_prog kernel parameter!\n",
  604. osd_login_prog);
  605. osd_login_prog[0] = '\0';
  606. }
  607. dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
  608. return ret;
  609. }
  610. /* Assume dest is all zeros */
  611. static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
  612. char *dest, int max_len,
  613. const char *var_name)
  614. {
  615. if (!s.len)
  616. return;
  617. if (s.len >= max_len) {
  618. pr_warn_ratelimited(
  619. "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
  620. var_name, s.len, max_len);
  621. s.len = max_len - 1; /* space for null terminator */
  622. }
  623. memcpy(dest, s.data, s.len);
  624. }
  625. /* Assume sysid is all zeros */
  626. static void _sysid_2_hex(struct nfs4_string s,
  627. char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
  628. {
  629. int i;
  630. char *cur;
  631. if (!s.len)
  632. return;
  633. if (s.len != OSD_SYSTEMID_LEN) {
  634. pr_warn_ratelimited(
  635. "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
  636. s.len);
  637. if (s.len > OSD_SYSTEMID_LEN)
  638. s.len = OSD_SYSTEMID_LEN;
  639. }
  640. cur = sysid;
  641. for (i = 0; i < s.len; i++)
  642. cur = hex_byte_pack(cur, s.data[i]);
  643. }
  644. int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
  645. {
  646. int rc;
  647. struct __auto_login login;
  648. if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
  649. return -ENODEV;
  650. memset(&login, 0, sizeof(login));
  651. __copy_nfsS_and_zero_terminate(
  652. deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
  653. login.uri, sizeof(login.uri), "URI");
  654. __copy_nfsS_and_zero_terminate(
  655. deviceaddr->oda_osdname,
  656. login.osdname, sizeof(login.osdname), "OSDNAME");
  657. _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
  658. rc = __objlayout_upcall(&login);
  659. if (rc > 0) /* script returns positive values */
  660. rc = -ENODEV;
  661. return rc;
  662. }