pblk-recovery.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. /*
  2. * Copyright (C) 2016 CNEX Labs
  3. * Initial: Javier Gonzalez <javier@cnexlabs.com>
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License version
  7. * 2 as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * pblk-recovery.c - pblk's recovery path
  15. */
  16. #include "pblk.h"
  17. void pblk_submit_rec(struct work_struct *work)
  18. {
  19. struct pblk_rec_ctx *recovery =
  20. container_of(work, struct pblk_rec_ctx, ws_rec);
  21. struct pblk *pblk = recovery->pblk;
  22. struct nvm_tgt_dev *dev = pblk->dev;
  23. struct nvm_rq *rqd = recovery->rqd;
  24. struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
  25. int max_secs = nvm_max_phys_sects(dev);
  26. struct bio *bio;
  27. unsigned int nr_rec_secs;
  28. unsigned int pgs_read;
  29. int ret;
  30. nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
  31. max_secs);
  32. bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
  33. if (!bio) {
  34. pr_err("pblk: not able to create recovery bio\n");
  35. return;
  36. }
  37. bio->bi_iter.bi_sector = 0;
  38. bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  39. rqd->bio = bio;
  40. rqd->nr_ppas = nr_rec_secs;
  41. pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
  42. nr_rec_secs);
  43. if (pgs_read != nr_rec_secs) {
  44. pr_err("pblk: could not read recovery entries\n");
  45. goto err;
  46. }
  47. if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
  48. pr_err("pblk: could not setup recovery request\n");
  49. goto err;
  50. }
  51. #ifdef CONFIG_NVM_DEBUG
  52. atomic_long_add(nr_rec_secs, &pblk->recov_writes);
  53. #endif
  54. ret = pblk_submit_io(pblk, rqd);
  55. if (ret) {
  56. pr_err("pblk: I/O submission failed: %d\n", ret);
  57. goto err;
  58. }
  59. mempool_free(recovery, pblk->rec_pool);
  60. return;
  61. err:
  62. bio_put(bio);
  63. pblk_free_rqd(pblk, rqd, WRITE);
  64. }
  65. int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
  66. struct pblk_rec_ctx *recovery, u64 *comp_bits,
  67. unsigned int comp)
  68. {
  69. struct nvm_tgt_dev *dev = pblk->dev;
  70. int max_secs = nvm_max_phys_sects(dev);
  71. struct nvm_rq *rec_rqd;
  72. struct pblk_c_ctx *rec_ctx;
  73. int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
  74. rec_rqd = pblk_alloc_rqd(pblk, WRITE);
  75. if (IS_ERR(rec_rqd)) {
  76. pr_err("pblk: could not create recovery req.\n");
  77. return -ENOMEM;
  78. }
  79. rec_ctx = nvm_rq_to_pdu(rec_rqd);
  80. /* Copy completion bitmap, but exclude the first X completed entries */
  81. bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
  82. (unsigned long int *)comp_bits,
  83. comp, max_secs);
  84. /* Save the context for the entries that need to be re-written and
  85. * update current context with the completed entries.
  86. */
  87. rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
  88. if (comp >= c_ctx->nr_valid) {
  89. rec_ctx->nr_valid = 0;
  90. rec_ctx->nr_padded = nr_entries - comp;
  91. c_ctx->nr_padded = comp - c_ctx->nr_valid;
  92. } else {
  93. rec_ctx->nr_valid = c_ctx->nr_valid - comp;
  94. rec_ctx->nr_padded = c_ctx->nr_padded;
  95. c_ctx->nr_valid = comp;
  96. c_ctx->nr_padded = 0;
  97. }
  98. recovery->rqd = rec_rqd;
  99. recovery->pblk = pblk;
  100. return 0;
  101. }
  102. __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
  103. {
  104. u32 crc;
  105. crc = pblk_calc_emeta_crc(pblk, emeta_buf);
  106. if (le32_to_cpu(emeta_buf->crc) != crc)
  107. return NULL;
  108. if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
  109. return NULL;
  110. return emeta_to_lbas(pblk, emeta_buf);
  111. }
  112. static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
  113. {
  114. struct nvm_tgt_dev *dev = pblk->dev;
  115. struct nvm_geo *geo = &dev->geo;
  116. struct pblk_line_meta *lm = &pblk->lm;
  117. struct pblk_emeta *emeta = line->emeta;
  118. struct line_emeta *emeta_buf = emeta->buf;
  119. __le64 *lba_list;
  120. int data_start;
  121. int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
  122. int i;
  123. lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
  124. if (!lba_list)
  125. return 1;
  126. data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
  127. nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
  128. nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
  129. for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
  130. struct ppa_addr ppa;
  131. int pos;
  132. ppa = addr_to_pblk_ppa(pblk, i, line->id);
  133. pos = pblk_ppa_to_pos(geo, ppa);
  134. /* Do not update bad blocks */
  135. if (test_bit(pos, line->blk_bitmap))
  136. continue;
  137. if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
  138. spin_lock(&line->lock);
  139. if (test_and_set_bit(i, line->invalid_bitmap))
  140. WARN_ONCE(1, "pblk: rec. double invalidate:\n");
  141. else
  142. le32_add_cpu(line->vsc, -1);
  143. spin_unlock(&line->lock);
  144. continue;
  145. }
  146. pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
  147. nr_lbas++;
  148. }
  149. if (nr_valid_lbas != nr_lbas)
  150. pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
  151. line->id, emeta_buf->nr_valid_lbas, nr_lbas);
  152. line->left_msecs = 0;
  153. return 0;
  154. }
  155. static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
  156. {
  157. struct nvm_tgt_dev *dev = pblk->dev;
  158. struct nvm_geo *geo = &dev->geo;
  159. struct pblk_line_meta *lm = &pblk->lm;
  160. int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
  161. return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
  162. nr_bb * geo->sec_per_blk;
  163. }
  164. struct pblk_recov_alloc {
  165. struct ppa_addr *ppa_list;
  166. struct pblk_sec_meta *meta_list;
  167. struct nvm_rq *rqd;
  168. void *data;
  169. dma_addr_t dma_ppa_list;
  170. dma_addr_t dma_meta_list;
  171. };
  172. static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
  173. struct pblk_recov_alloc p, u64 r_ptr)
  174. {
  175. struct nvm_tgt_dev *dev = pblk->dev;
  176. struct nvm_geo *geo = &dev->geo;
  177. struct ppa_addr *ppa_list;
  178. struct pblk_sec_meta *meta_list;
  179. struct nvm_rq *rqd;
  180. struct bio *bio;
  181. void *data;
  182. dma_addr_t dma_ppa_list, dma_meta_list;
  183. u64 r_ptr_int;
  184. int left_ppas;
  185. int rq_ppas, rq_len;
  186. int i, j;
  187. int ret = 0;
  188. DECLARE_COMPLETION_ONSTACK(wait);
  189. ppa_list = p.ppa_list;
  190. meta_list = p.meta_list;
  191. rqd = p.rqd;
  192. data = p.data;
  193. dma_ppa_list = p.dma_ppa_list;
  194. dma_meta_list = p.dma_meta_list;
  195. left_ppas = line->cur_sec - r_ptr;
  196. if (!left_ppas)
  197. return 0;
  198. r_ptr_int = r_ptr;
  199. next_read_rq:
  200. memset(rqd, 0, pblk_g_rq_size);
  201. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  202. if (!rq_ppas)
  203. rq_ppas = pblk->min_write_pgs;
  204. rq_len = rq_ppas * geo->sec_size;
  205. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  206. if (IS_ERR(bio))
  207. return PTR_ERR(bio);
  208. bio->bi_iter.bi_sector = 0; /* internal bio */
  209. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  210. rqd->bio = bio;
  211. rqd->opcode = NVM_OP_PREAD;
  212. rqd->meta_list = meta_list;
  213. rqd->nr_ppas = rq_ppas;
  214. rqd->ppa_list = ppa_list;
  215. rqd->dma_ppa_list = dma_ppa_list;
  216. rqd->dma_meta_list = dma_meta_list;
  217. rqd->end_io = pblk_end_io_sync;
  218. rqd->private = &wait;
  219. if (pblk_io_aligned(pblk, rq_ppas))
  220. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  221. else
  222. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  223. for (i = 0; i < rqd->nr_ppas; ) {
  224. struct ppa_addr ppa;
  225. int pos;
  226. ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  227. pos = pblk_dev_ppa_to_pos(geo, ppa);
  228. while (test_bit(pos, line->blk_bitmap)) {
  229. r_ptr_int += pblk->min_write_pgs;
  230. ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  231. pos = pblk_dev_ppa_to_pos(geo, ppa);
  232. }
  233. for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
  234. rqd->ppa_list[i] =
  235. addr_to_gen_ppa(pblk, r_ptr_int, line->id);
  236. }
  237. /* If read fails, more padding is needed */
  238. ret = pblk_submit_io(pblk, rqd);
  239. if (ret) {
  240. pr_err("pblk: I/O submission failed: %d\n", ret);
  241. return ret;
  242. }
  243. if (!wait_for_completion_io_timeout(&wait,
  244. msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
  245. pr_err("pblk: L2P recovery read timed out\n");
  246. return -EINTR;
  247. }
  248. atomic_dec(&pblk->inflight_io);
  249. reinit_completion(&wait);
  250. /* At this point, the read should not fail. If it does, it is a problem
  251. * we cannot recover from here. Need FTL log.
  252. */
  253. if (rqd->error) {
  254. pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
  255. return -EINTR;
  256. }
  257. for (i = 0; i < rqd->nr_ppas; i++) {
  258. u64 lba = le64_to_cpu(meta_list[i].lba);
  259. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  260. continue;
  261. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  262. }
  263. left_ppas -= rq_ppas;
  264. if (left_ppas > 0)
  265. goto next_read_rq;
  266. return 0;
  267. }
  268. static void pblk_recov_complete(struct kref *ref)
  269. {
  270. struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
  271. complete(&pad_rq->wait);
  272. }
  273. static void pblk_end_io_recov(struct nvm_rq *rqd)
  274. {
  275. struct pblk_pad_rq *pad_rq = rqd->private;
  276. struct pblk *pblk = pad_rq->pblk;
  277. struct nvm_tgt_dev *dev = pblk->dev;
  278. pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  279. bio_put(rqd->bio);
  280. nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
  281. pblk_free_rqd(pblk, rqd, WRITE);
  282. atomic_dec(&pblk->inflight_io);
  283. kref_put(&pad_rq->ref, pblk_recov_complete);
  284. }
  285. static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
  286. int left_ppas)
  287. {
  288. struct nvm_tgt_dev *dev = pblk->dev;
  289. struct nvm_geo *geo = &dev->geo;
  290. struct ppa_addr *ppa_list;
  291. struct pblk_sec_meta *meta_list;
  292. struct pblk_pad_rq *pad_rq;
  293. struct nvm_rq *rqd;
  294. struct bio *bio;
  295. void *data;
  296. dma_addr_t dma_ppa_list, dma_meta_list;
  297. __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
  298. u64 w_ptr = line->cur_sec;
  299. int left_line_ppas, rq_ppas, rq_len;
  300. int i, j;
  301. int ret = 0;
  302. spin_lock(&line->lock);
  303. left_line_ppas = line->left_msecs;
  304. spin_unlock(&line->lock);
  305. pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
  306. if (!pad_rq)
  307. return -ENOMEM;
  308. data = vzalloc(pblk->max_write_pgs * geo->sec_size);
  309. if (!data) {
  310. ret = -ENOMEM;
  311. goto free_rq;
  312. }
  313. pad_rq->pblk = pblk;
  314. init_completion(&pad_rq->wait);
  315. kref_init(&pad_rq->ref);
  316. next_pad_rq:
  317. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  318. if (rq_ppas < pblk->min_write_pgs) {
  319. pr_err("pblk: corrupted pad line %d\n", line->id);
  320. goto fail_free_pad;
  321. }
  322. rq_len = rq_ppas * geo->sec_size;
  323. meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
  324. if (!meta_list) {
  325. ret = -ENOMEM;
  326. goto fail_free_pad;
  327. }
  328. ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
  329. dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
  330. rqd = pblk_alloc_rqd(pblk, WRITE);
  331. if (IS_ERR(rqd)) {
  332. ret = PTR_ERR(rqd);
  333. goto fail_free_meta;
  334. }
  335. bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
  336. PBLK_VMALLOC_META, GFP_KERNEL);
  337. if (IS_ERR(bio)) {
  338. ret = PTR_ERR(bio);
  339. goto fail_free_rqd;
  340. }
  341. bio->bi_iter.bi_sector = 0; /* internal bio */
  342. bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
  343. rqd->bio = bio;
  344. rqd->opcode = NVM_OP_PWRITE;
  345. rqd->flags = pblk_set_progr_mode(pblk, WRITE);
  346. rqd->meta_list = meta_list;
  347. rqd->nr_ppas = rq_ppas;
  348. rqd->ppa_list = ppa_list;
  349. rqd->dma_ppa_list = dma_ppa_list;
  350. rqd->dma_meta_list = dma_meta_list;
  351. rqd->end_io = pblk_end_io_recov;
  352. rqd->private = pad_rq;
  353. for (i = 0; i < rqd->nr_ppas; ) {
  354. struct ppa_addr ppa;
  355. int pos;
  356. w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  357. ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
  358. pos = pblk_ppa_to_pos(geo, ppa);
  359. while (test_bit(pos, line->blk_bitmap)) {
  360. w_ptr += pblk->min_write_pgs;
  361. ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
  362. pos = pblk_ppa_to_pos(geo, ppa);
  363. }
  364. for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
  365. struct ppa_addr dev_ppa;
  366. __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
  367. dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  368. pblk_map_invalidate(pblk, dev_ppa);
  369. lba_list[w_ptr] = meta_list[i].lba = addr_empty;
  370. rqd->ppa_list[i] = dev_ppa;
  371. }
  372. }
  373. kref_get(&pad_rq->ref);
  374. pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  375. ret = pblk_submit_io(pblk, rqd);
  376. if (ret) {
  377. pr_err("pblk: I/O submission failed: %d\n", ret);
  378. pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
  379. goto fail_free_bio;
  380. }
  381. left_line_ppas -= rq_ppas;
  382. left_ppas -= rq_ppas;
  383. if (left_ppas && left_line_ppas)
  384. goto next_pad_rq;
  385. kref_put(&pad_rq->ref, pblk_recov_complete);
  386. if (!wait_for_completion_io_timeout(&pad_rq->wait,
  387. msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
  388. pr_err("pblk: pad write timed out\n");
  389. ret = -ETIME;
  390. }
  391. if (!pblk_line_is_full(line))
  392. pr_err("pblk: corrupted padded line: %d\n", line->id);
  393. vfree(data);
  394. free_rq:
  395. kfree(pad_rq);
  396. return ret;
  397. fail_free_bio:
  398. bio_put(bio);
  399. fail_free_rqd:
  400. pblk_free_rqd(pblk, rqd, WRITE);
  401. fail_free_meta:
  402. nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
  403. fail_free_pad:
  404. kfree(pad_rq);
  405. vfree(data);
  406. return ret;
  407. }
  408. /* When this function is called, it means that not all upper pages have been
  409. * written in a page that contains valid data. In order to recover this data, we
  410. * first find the write pointer on the device, then we pad all necessary
  411. * sectors, and finally attempt to read the valid data
  412. */
  413. static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
  414. struct pblk_recov_alloc p)
  415. {
  416. struct nvm_tgt_dev *dev = pblk->dev;
  417. struct nvm_geo *geo = &dev->geo;
  418. struct ppa_addr *ppa_list;
  419. struct pblk_sec_meta *meta_list;
  420. struct nvm_rq *rqd;
  421. struct bio *bio;
  422. void *data;
  423. dma_addr_t dma_ppa_list, dma_meta_list;
  424. u64 w_ptr = 0, r_ptr;
  425. int rq_ppas, rq_len;
  426. int i, j;
  427. int ret = 0;
  428. int rec_round;
  429. int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
  430. DECLARE_COMPLETION_ONSTACK(wait);
  431. ppa_list = p.ppa_list;
  432. meta_list = p.meta_list;
  433. rqd = p.rqd;
  434. data = p.data;
  435. dma_ppa_list = p.dma_ppa_list;
  436. dma_meta_list = p.dma_meta_list;
  437. /* we could recover up until the line write pointer */
  438. r_ptr = line->cur_sec;
  439. rec_round = 0;
  440. next_rq:
  441. memset(rqd, 0, pblk_g_rq_size);
  442. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  443. if (!rq_ppas)
  444. rq_ppas = pblk->min_write_pgs;
  445. rq_len = rq_ppas * geo->sec_size;
  446. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  447. if (IS_ERR(bio))
  448. return PTR_ERR(bio);
  449. bio->bi_iter.bi_sector = 0; /* internal bio */
  450. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  451. rqd->bio = bio;
  452. rqd->opcode = NVM_OP_PREAD;
  453. rqd->meta_list = meta_list;
  454. rqd->nr_ppas = rq_ppas;
  455. rqd->ppa_list = ppa_list;
  456. rqd->dma_ppa_list = dma_ppa_list;
  457. rqd->dma_meta_list = dma_meta_list;
  458. rqd->end_io = pblk_end_io_sync;
  459. rqd->private = &wait;
  460. if (pblk_io_aligned(pblk, rq_ppas))
  461. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  462. else
  463. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  464. for (i = 0; i < rqd->nr_ppas; ) {
  465. struct ppa_addr ppa;
  466. int pos;
  467. w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  468. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  469. pos = pblk_dev_ppa_to_pos(geo, ppa);
  470. while (test_bit(pos, line->blk_bitmap)) {
  471. w_ptr += pblk->min_write_pgs;
  472. ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
  473. pos = pblk_dev_ppa_to_pos(geo, ppa);
  474. }
  475. for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
  476. rqd->ppa_list[i] =
  477. addr_to_gen_ppa(pblk, w_ptr, line->id);
  478. }
  479. ret = pblk_submit_io(pblk, rqd);
  480. if (ret) {
  481. pr_err("pblk: I/O submission failed: %d\n", ret);
  482. return ret;
  483. }
  484. if (!wait_for_completion_io_timeout(&wait,
  485. msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
  486. pr_err("pblk: L2P recovery read timed out\n");
  487. }
  488. atomic_dec(&pblk->inflight_io);
  489. reinit_completion(&wait);
  490. /* This should not happen since the read failed during normal recovery,
  491. * but the media works funny sometimes...
  492. */
  493. if (!rec_round++ && !rqd->error) {
  494. rec_round = 0;
  495. for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
  496. u64 lba = le64_to_cpu(meta_list[i].lba);
  497. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  498. continue;
  499. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  500. }
  501. }
  502. /* Reached the end of the written line */
  503. if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
  504. int pad_secs, nr_error_bits, bit;
  505. int ret;
  506. bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
  507. nr_error_bits = rqd->nr_ppas - bit;
  508. /* Roll back failed sectors */
  509. line->cur_sec -= nr_error_bits;
  510. line->left_msecs += nr_error_bits;
  511. bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
  512. pad_secs = pblk_pad_distance(pblk);
  513. if (pad_secs > line->left_msecs)
  514. pad_secs = line->left_msecs;
  515. ret = pblk_recov_pad_oob(pblk, line, pad_secs);
  516. if (ret)
  517. pr_err("pblk: OOB padding failed (err:%d)\n", ret);
  518. ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
  519. if (ret)
  520. pr_err("pblk: OOB read failed (err:%d)\n", ret);
  521. left_ppas = 0;
  522. }
  523. left_ppas -= rq_ppas;
  524. if (left_ppas > 0)
  525. goto next_rq;
  526. return ret;
  527. }
  528. static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
  529. struct pblk_recov_alloc p, int *done)
  530. {
  531. struct nvm_tgt_dev *dev = pblk->dev;
  532. struct nvm_geo *geo = &dev->geo;
  533. struct ppa_addr *ppa_list;
  534. struct pblk_sec_meta *meta_list;
  535. struct nvm_rq *rqd;
  536. struct bio *bio;
  537. void *data;
  538. dma_addr_t dma_ppa_list, dma_meta_list;
  539. u64 paddr;
  540. int rq_ppas, rq_len;
  541. int i, j;
  542. int ret = 0;
  543. int left_ppas = pblk_calc_sec_in_line(pblk, line);
  544. DECLARE_COMPLETION_ONSTACK(wait);
  545. ppa_list = p.ppa_list;
  546. meta_list = p.meta_list;
  547. rqd = p.rqd;
  548. data = p.data;
  549. dma_ppa_list = p.dma_ppa_list;
  550. dma_meta_list = p.dma_meta_list;
  551. *done = 1;
  552. next_rq:
  553. memset(rqd, 0, pblk_g_rq_size);
  554. rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
  555. if (!rq_ppas)
  556. rq_ppas = pblk->min_write_pgs;
  557. rq_len = rq_ppas * geo->sec_size;
  558. bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
  559. if (IS_ERR(bio))
  560. return PTR_ERR(bio);
  561. bio->bi_iter.bi_sector = 0; /* internal bio */
  562. bio_set_op_attrs(bio, REQ_OP_READ, 0);
  563. rqd->bio = bio;
  564. rqd->opcode = NVM_OP_PREAD;
  565. rqd->meta_list = meta_list;
  566. rqd->nr_ppas = rq_ppas;
  567. rqd->ppa_list = ppa_list;
  568. rqd->dma_ppa_list = dma_ppa_list;
  569. rqd->dma_meta_list = dma_meta_list;
  570. rqd->end_io = pblk_end_io_sync;
  571. rqd->private = &wait;
  572. if (pblk_io_aligned(pblk, rq_ppas))
  573. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
  574. else
  575. rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
  576. for (i = 0; i < rqd->nr_ppas; ) {
  577. struct ppa_addr ppa;
  578. int pos;
  579. paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
  580. ppa = addr_to_gen_ppa(pblk, paddr, line->id);
  581. pos = pblk_dev_ppa_to_pos(geo, ppa);
  582. while (test_bit(pos, line->blk_bitmap)) {
  583. paddr += pblk->min_write_pgs;
  584. ppa = addr_to_gen_ppa(pblk, paddr, line->id);
  585. pos = pblk_dev_ppa_to_pos(geo, ppa);
  586. }
  587. for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
  588. rqd->ppa_list[i] =
  589. addr_to_gen_ppa(pblk, paddr, line->id);
  590. }
  591. ret = pblk_submit_io(pblk, rqd);
  592. if (ret) {
  593. pr_err("pblk: I/O submission failed: %d\n", ret);
  594. bio_put(bio);
  595. return ret;
  596. }
  597. if (!wait_for_completion_io_timeout(&wait,
  598. msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
  599. pr_err("pblk: L2P recovery read timed out\n");
  600. }
  601. atomic_dec(&pblk->inflight_io);
  602. reinit_completion(&wait);
  603. /* Reached the end of the written line */
  604. if (rqd->error) {
  605. int nr_error_bits, bit;
  606. bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
  607. nr_error_bits = rqd->nr_ppas - bit;
  608. /* Roll back failed sectors */
  609. line->cur_sec -= nr_error_bits;
  610. line->left_msecs += nr_error_bits;
  611. bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
  612. left_ppas = 0;
  613. rqd->nr_ppas = bit;
  614. if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
  615. *done = 0;
  616. }
  617. for (i = 0; i < rqd->nr_ppas; i++) {
  618. u64 lba = le64_to_cpu(meta_list[i].lba);
  619. if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
  620. continue;
  621. pblk_update_map(pblk, lba, rqd->ppa_list[i]);
  622. }
  623. left_ppas -= rq_ppas;
  624. if (left_ppas > 0)
  625. goto next_rq;
  626. return ret;
  627. }
  628. /* Scan line for lbas on out of bound area */
  629. static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
  630. {
  631. struct nvm_tgt_dev *dev = pblk->dev;
  632. struct nvm_geo *geo = &dev->geo;
  633. struct nvm_rq *rqd;
  634. struct ppa_addr *ppa_list;
  635. struct pblk_sec_meta *meta_list;
  636. struct pblk_recov_alloc p;
  637. void *data;
  638. dma_addr_t dma_ppa_list, dma_meta_list;
  639. int done, ret = 0;
  640. rqd = pblk_alloc_rqd(pblk, READ);
  641. if (IS_ERR(rqd))
  642. return PTR_ERR(rqd);
  643. meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
  644. if (!meta_list) {
  645. ret = -ENOMEM;
  646. goto free_rqd;
  647. }
  648. ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
  649. dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
  650. data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
  651. if (!data) {
  652. ret = -ENOMEM;
  653. goto free_meta_list;
  654. }
  655. p.ppa_list = ppa_list;
  656. p.meta_list = meta_list;
  657. p.rqd = rqd;
  658. p.data = data;
  659. p.dma_ppa_list = dma_ppa_list;
  660. p.dma_meta_list = dma_meta_list;
  661. ret = pblk_recov_scan_oob(pblk, line, p, &done);
  662. if (ret) {
  663. pr_err("pblk: could not recover L2P from OOB\n");
  664. goto out;
  665. }
  666. if (!done) {
  667. ret = pblk_recov_scan_all_oob(pblk, line, p);
  668. if (ret) {
  669. pr_err("pblk: could not recover L2P from OOB\n");
  670. goto out;
  671. }
  672. }
  673. if (pblk_line_is_full(line))
  674. pblk_line_recov_close(pblk, line);
  675. out:
  676. kfree(data);
  677. free_meta_list:
  678. nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
  679. free_rqd:
  680. pblk_free_rqd(pblk, rqd, READ);
  681. return ret;
  682. }
  683. /* Insert lines ordered by sequence number (seq_num) on list */
  684. static void pblk_recov_line_add_ordered(struct list_head *head,
  685. struct pblk_line *line)
  686. {
  687. struct pblk_line *t = NULL;
  688. list_for_each_entry(t, head, list)
  689. if (t->seq_nr > line->seq_nr)
  690. break;
  691. __list_add(&line->list, t->list.prev, &t->list);
  692. }
  693. struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
  694. {
  695. struct nvm_tgt_dev *dev = pblk->dev;
  696. struct nvm_geo *geo = &dev->geo;
  697. struct pblk_line_meta *lm = &pblk->lm;
  698. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  699. struct pblk_line *line, *tline, *data_line = NULL;
  700. struct pblk_smeta *smeta;
  701. struct pblk_emeta *emeta;
  702. struct line_smeta *smeta_buf;
  703. int found_lines = 0, recovered_lines = 0, open_lines = 0;
  704. int is_next = 0;
  705. int meta_line;
  706. int i, valid_uuid = 0;
  707. LIST_HEAD(recov_list);
  708. /* TODO: Implement FTL snapshot */
  709. /* Scan recovery - takes place when FTL snapshot fails */
  710. spin_lock(&l_mg->free_lock);
  711. meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
  712. set_bit(meta_line, &l_mg->meta_bitmap);
  713. smeta = l_mg->sline_meta[meta_line];
  714. emeta = l_mg->eline_meta[meta_line];
  715. smeta_buf = (struct line_smeta *)smeta;
  716. spin_unlock(&l_mg->free_lock);
  717. /* Order data lines using their sequence number */
  718. for (i = 0; i < l_mg->nr_lines; i++) {
  719. u32 crc;
  720. line = &pblk->lines[i];
  721. memset(smeta, 0, lm->smeta_len);
  722. line->smeta = smeta;
  723. line->lun_bitmap = ((void *)(smeta_buf)) +
  724. sizeof(struct line_smeta);
  725. /* Lines that cannot be read are assumed as not written here */
  726. if (pblk_line_read_smeta(pblk, line))
  727. continue;
  728. crc = pblk_calc_smeta_crc(pblk, smeta_buf);
  729. if (le32_to_cpu(smeta_buf->crc) != crc)
  730. continue;
  731. if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
  732. continue;
  733. if (le16_to_cpu(smeta_buf->header.version) != 1) {
  734. pr_err("pblk: found incompatible line version %u\n",
  735. smeta_buf->header.version);
  736. return ERR_PTR(-EINVAL);
  737. }
  738. /* The first valid instance uuid is used for initialization */
  739. if (!valid_uuid) {
  740. memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
  741. valid_uuid = 1;
  742. }
  743. if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
  744. pr_debug("pblk: ignore line %u due to uuid mismatch\n",
  745. i);
  746. continue;
  747. }
  748. /* Update line metadata */
  749. spin_lock(&line->lock);
  750. line->id = le32_to_cpu(smeta_buf->header.id);
  751. line->type = le16_to_cpu(smeta_buf->header.type);
  752. line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
  753. spin_unlock(&line->lock);
  754. /* Update general metadata */
  755. spin_lock(&l_mg->free_lock);
  756. if (line->seq_nr >= l_mg->d_seq_nr)
  757. l_mg->d_seq_nr = line->seq_nr + 1;
  758. l_mg->nr_free_lines--;
  759. spin_unlock(&l_mg->free_lock);
  760. if (pblk_line_recov_alloc(pblk, line))
  761. goto out;
  762. pblk_recov_line_add_ordered(&recov_list, line);
  763. found_lines++;
  764. pr_debug("pblk: recovering data line %d, seq:%llu\n",
  765. line->id, smeta_buf->seq_nr);
  766. }
  767. if (!found_lines) {
  768. pblk_setup_uuid(pblk);
  769. spin_lock(&l_mg->free_lock);
  770. WARN_ON_ONCE(!test_and_clear_bit(meta_line,
  771. &l_mg->meta_bitmap));
  772. spin_unlock(&l_mg->free_lock);
  773. goto out;
  774. }
  775. /* Verify closed blocks and recover this portion of L2P table*/
  776. list_for_each_entry_safe(line, tline, &recov_list, list) {
  777. int off, nr_bb;
  778. recovered_lines++;
  779. /* Calculate where emeta starts based on the line bb */
  780. off = lm->sec_per_line - lm->emeta_sec[0];
  781. nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
  782. off -= nr_bb * geo->sec_per_pl;
  783. line->emeta_ssec = off;
  784. line->emeta = emeta;
  785. memset(line->emeta->buf, 0, lm->emeta_len[0]);
  786. if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
  787. pblk_recov_l2p_from_oob(pblk, line);
  788. goto next;
  789. }
  790. if (pblk_recov_l2p_from_emeta(pblk, line))
  791. pblk_recov_l2p_from_oob(pblk, line);
  792. next:
  793. if (pblk_line_is_full(line)) {
  794. struct list_head *move_list;
  795. spin_lock(&line->lock);
  796. line->state = PBLK_LINESTATE_CLOSED;
  797. move_list = pblk_line_gc_list(pblk, line);
  798. spin_unlock(&line->lock);
  799. spin_lock(&l_mg->gc_lock);
  800. list_move_tail(&line->list, move_list);
  801. spin_unlock(&l_mg->gc_lock);
  802. mempool_free(line->map_bitmap, pblk->line_meta_pool);
  803. line->map_bitmap = NULL;
  804. line->smeta = NULL;
  805. line->emeta = NULL;
  806. } else {
  807. if (open_lines > 1)
  808. pr_err("pblk: failed to recover L2P\n");
  809. open_lines++;
  810. line->meta_line = meta_line;
  811. data_line = line;
  812. }
  813. }
  814. if (!open_lines) {
  815. spin_lock(&l_mg->free_lock);
  816. WARN_ON_ONCE(!test_and_clear_bit(meta_line,
  817. &l_mg->meta_bitmap));
  818. spin_unlock(&l_mg->free_lock);
  819. pblk_line_replace_data(pblk);
  820. } else {
  821. spin_lock(&l_mg->free_lock);
  822. /* Allocate next line for preparation */
  823. l_mg->data_next = pblk_line_get(pblk);
  824. if (l_mg->data_next) {
  825. l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
  826. l_mg->data_next->type = PBLK_LINETYPE_DATA;
  827. is_next = 1;
  828. }
  829. spin_unlock(&l_mg->free_lock);
  830. }
  831. if (is_next) {
  832. pblk_line_erase(pblk, l_mg->data_next);
  833. pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
  834. }
  835. out:
  836. if (found_lines != recovered_lines)
  837. pr_err("pblk: failed to recover all found lines %d/%d\n",
  838. found_lines, recovered_lines);
  839. return data_line;
  840. }
  841. /*
  842. * Pad current line
  843. */
  844. int pblk_recov_pad(struct pblk *pblk)
  845. {
  846. struct pblk_line *line;
  847. struct pblk_line_mgmt *l_mg = &pblk->l_mg;
  848. int left_msecs;
  849. int ret = 0;
  850. spin_lock(&l_mg->free_lock);
  851. line = l_mg->data_line;
  852. left_msecs = line->left_msecs;
  853. spin_unlock(&l_mg->free_lock);
  854. ret = pblk_recov_pad_oob(pblk, line, left_msecs);
  855. if (ret) {
  856. pr_err("pblk: Tear down padding failed (%d)\n", ret);
  857. return ret;
  858. }
  859. pblk_line_close_meta(pblk, line);
  860. return ret;
  861. }