dm-block-manager.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. /*
  2. * Copyright (C) 2011 Red Hat, Inc.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-block-manager.h"
  7. #include "dm-persistent-data-internal.h"
  8. #include "../dm-bufio.h"
  9. #include <linux/crc32c.h>
  10. #include <linux/module.h>
  11. #include <linux/slab.h>
  12. #include <linux/rwsem.h>
  13. #include <linux/device-mapper.h>
  14. #include <linux/stacktrace.h>
  15. #define DM_MSG_PREFIX "block manager"
  16. /*----------------------------------------------------------------*/
  17. /*
  18. * This is a read/write semaphore with a couple of differences.
  19. *
  20. * i) There is a restriction on the number of concurrent read locks that
  21. * may be held at once. This is just an implementation detail.
  22. *
  23. * ii) Recursive locking attempts are detected and return EINVAL. A stack
  24. * trace is also emitted for the previous lock aquisition.
  25. *
  26. * iii) Priority is given to write locks.
  27. */
  28. #define MAX_HOLDERS 4
  29. #define MAX_STACK 10
  30. typedef unsigned long stack_entries[MAX_STACK];
  31. struct block_lock {
  32. spinlock_t lock;
  33. __s32 count;
  34. struct list_head waiters;
  35. struct task_struct *holders[MAX_HOLDERS];
  36. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  37. struct stack_trace traces[MAX_HOLDERS];
  38. stack_entries entries[MAX_HOLDERS];
  39. #endif
  40. };
  41. struct waiter {
  42. struct list_head list;
  43. struct task_struct *task;
  44. int wants_write;
  45. };
  46. static unsigned __find_holder(struct block_lock *lock,
  47. struct task_struct *task)
  48. {
  49. unsigned i;
  50. for (i = 0; i < MAX_HOLDERS; i++)
  51. if (lock->holders[i] == task)
  52. break;
  53. BUG_ON(i == MAX_HOLDERS);
  54. return i;
  55. }
  56. /* call this *after* you increment lock->count */
  57. static void __add_holder(struct block_lock *lock, struct task_struct *task)
  58. {
  59. unsigned h = __find_holder(lock, NULL);
  60. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  61. struct stack_trace *t;
  62. #endif
  63. get_task_struct(task);
  64. lock->holders[h] = task;
  65. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  66. t = lock->traces + h;
  67. t->nr_entries = 0;
  68. t->max_entries = MAX_STACK;
  69. t->entries = lock->entries[h];
  70. t->skip = 2;
  71. save_stack_trace(t);
  72. #endif
  73. }
  74. /* call this *before* you decrement lock->count */
  75. static void __del_holder(struct block_lock *lock, struct task_struct *task)
  76. {
  77. unsigned h = __find_holder(lock, task);
  78. lock->holders[h] = NULL;
  79. put_task_struct(task);
  80. }
  81. static int __check_holder(struct block_lock *lock)
  82. {
  83. unsigned i;
  84. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  85. static struct stack_trace t;
  86. static stack_entries entries;
  87. #endif
  88. for (i = 0; i < MAX_HOLDERS; i++) {
  89. if (lock->holders[i] == current) {
  90. DMERR("recursive lock detected in pool metadata");
  91. #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
  92. DMERR("previously held here:");
  93. print_stack_trace(lock->traces + i, 4);
  94. DMERR("subsequent aquisition attempted here:");
  95. t.nr_entries = 0;
  96. t.max_entries = MAX_STACK;
  97. t.entries = entries;
  98. t.skip = 3;
  99. save_stack_trace(&t);
  100. print_stack_trace(&t, 4);
  101. #endif
  102. return -EINVAL;
  103. }
  104. }
  105. return 0;
  106. }
  107. static void __wait(struct waiter *w)
  108. {
  109. for (;;) {
  110. set_task_state(current, TASK_UNINTERRUPTIBLE);
  111. if (!w->task)
  112. break;
  113. schedule();
  114. }
  115. set_task_state(current, TASK_RUNNING);
  116. }
  117. static void __wake_waiter(struct waiter *w)
  118. {
  119. struct task_struct *task;
  120. list_del(&w->list);
  121. task = w->task;
  122. smp_mb();
  123. w->task = NULL;
  124. wake_up_process(task);
  125. }
  126. /*
  127. * We either wake a few readers or a single writer.
  128. */
  129. static void __wake_many(struct block_lock *lock)
  130. {
  131. struct waiter *w, *tmp;
  132. BUG_ON(lock->count < 0);
  133. list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
  134. if (lock->count >= MAX_HOLDERS)
  135. return;
  136. if (w->wants_write) {
  137. if (lock->count > 0)
  138. return; /* still read locked */
  139. lock->count = -1;
  140. __add_holder(lock, w->task);
  141. __wake_waiter(w);
  142. return;
  143. }
  144. lock->count++;
  145. __add_holder(lock, w->task);
  146. __wake_waiter(w);
  147. }
  148. }
  149. static void bl_init(struct block_lock *lock)
  150. {
  151. int i;
  152. spin_lock_init(&lock->lock);
  153. lock->count = 0;
  154. INIT_LIST_HEAD(&lock->waiters);
  155. for (i = 0; i < MAX_HOLDERS; i++)
  156. lock->holders[i] = NULL;
  157. }
  158. static int __available_for_read(struct block_lock *lock)
  159. {
  160. return lock->count >= 0 &&
  161. lock->count < MAX_HOLDERS &&
  162. list_empty(&lock->waiters);
  163. }
  164. static int bl_down_read(struct block_lock *lock)
  165. {
  166. int r;
  167. struct waiter w;
  168. spin_lock(&lock->lock);
  169. r = __check_holder(lock);
  170. if (r) {
  171. spin_unlock(&lock->lock);
  172. return r;
  173. }
  174. if (__available_for_read(lock)) {
  175. lock->count++;
  176. __add_holder(lock, current);
  177. spin_unlock(&lock->lock);
  178. return 0;
  179. }
  180. get_task_struct(current);
  181. w.task = current;
  182. w.wants_write = 0;
  183. list_add_tail(&w.list, &lock->waiters);
  184. spin_unlock(&lock->lock);
  185. __wait(&w);
  186. put_task_struct(current);
  187. return 0;
  188. }
  189. static int bl_down_read_nonblock(struct block_lock *lock)
  190. {
  191. int r;
  192. spin_lock(&lock->lock);
  193. r = __check_holder(lock);
  194. if (r)
  195. goto out;
  196. if (__available_for_read(lock)) {
  197. lock->count++;
  198. __add_holder(lock, current);
  199. r = 0;
  200. } else
  201. r = -EWOULDBLOCK;
  202. out:
  203. spin_unlock(&lock->lock);
  204. return r;
  205. }
  206. static void bl_up_read(struct block_lock *lock)
  207. {
  208. spin_lock(&lock->lock);
  209. BUG_ON(lock->count <= 0);
  210. __del_holder(lock, current);
  211. --lock->count;
  212. if (!list_empty(&lock->waiters))
  213. __wake_many(lock);
  214. spin_unlock(&lock->lock);
  215. }
  216. static int bl_down_write(struct block_lock *lock)
  217. {
  218. int r;
  219. struct waiter w;
  220. spin_lock(&lock->lock);
  221. r = __check_holder(lock);
  222. if (r) {
  223. spin_unlock(&lock->lock);
  224. return r;
  225. }
  226. if (lock->count == 0 && list_empty(&lock->waiters)) {
  227. lock->count = -1;
  228. __add_holder(lock, current);
  229. spin_unlock(&lock->lock);
  230. return 0;
  231. }
  232. get_task_struct(current);
  233. w.task = current;
  234. w.wants_write = 1;
  235. /*
  236. * Writers given priority. We know there's only one mutator in the
  237. * system, so ignoring the ordering reversal.
  238. */
  239. list_add(&w.list, &lock->waiters);
  240. spin_unlock(&lock->lock);
  241. __wait(&w);
  242. put_task_struct(current);
  243. return 0;
  244. }
  245. static void bl_up_write(struct block_lock *lock)
  246. {
  247. spin_lock(&lock->lock);
  248. __del_holder(lock, current);
  249. lock->count = 0;
  250. if (!list_empty(&lock->waiters))
  251. __wake_many(lock);
  252. spin_unlock(&lock->lock);
  253. }
  254. static void report_recursive_bug(dm_block_t b, int r)
  255. {
  256. if (r == -EINVAL)
  257. DMERR("recursive acquisition of block %llu requested.",
  258. (unsigned long long) b);
  259. }
  260. /*----------------------------------------------------------------*/
  261. /*
  262. * Block manager is currently implemented using dm-bufio. struct
  263. * dm_block_manager and struct dm_block map directly onto a couple of
  264. * structs in the bufio interface. I want to retain the freedom to move
  265. * away from bufio in the future. So these structs are just cast within
  266. * this .c file, rather than making it through to the public interface.
  267. */
  268. static struct dm_buffer *to_buffer(struct dm_block *b)
  269. {
  270. return (struct dm_buffer *) b;
  271. }
  272. static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm)
  273. {
  274. return (struct dm_bufio_client *) bm;
  275. }
  276. dm_block_t dm_block_location(struct dm_block *b)
  277. {
  278. return dm_bufio_get_block_number(to_buffer(b));
  279. }
  280. EXPORT_SYMBOL_GPL(dm_block_location);
  281. void *dm_block_data(struct dm_block *b)
  282. {
  283. return dm_bufio_get_block_data(to_buffer(b));
  284. }
  285. EXPORT_SYMBOL_GPL(dm_block_data);
  286. struct buffer_aux {
  287. struct dm_block_validator *validator;
  288. struct block_lock lock;
  289. int write_locked;
  290. };
  291. static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
  292. {
  293. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  294. aux->validator = NULL;
  295. bl_init(&aux->lock);
  296. }
  297. static void dm_block_manager_write_callback(struct dm_buffer *buf)
  298. {
  299. struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
  300. if (aux->validator) {
  301. aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
  302. dm_bufio_get_block_size(dm_bufio_get_client(buf)));
  303. }
  304. }
  305. /*----------------------------------------------------------------
  306. * Public interface
  307. *--------------------------------------------------------------*/
  308. struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
  309. unsigned block_size,
  310. unsigned cache_size,
  311. unsigned max_held_per_thread)
  312. {
  313. return (struct dm_block_manager *)
  314. dm_bufio_client_create(bdev, block_size, max_held_per_thread,
  315. sizeof(struct buffer_aux),
  316. dm_block_manager_alloc_callback,
  317. dm_block_manager_write_callback);
  318. }
  319. EXPORT_SYMBOL_GPL(dm_block_manager_create);
  320. void dm_block_manager_destroy(struct dm_block_manager *bm)
  321. {
  322. return dm_bufio_client_destroy(to_bufio(bm));
  323. }
  324. EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
  325. unsigned dm_bm_block_size(struct dm_block_manager *bm)
  326. {
  327. return dm_bufio_get_block_size(to_bufio(bm));
  328. }
  329. EXPORT_SYMBOL_GPL(dm_bm_block_size);
  330. dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
  331. {
  332. return dm_bufio_get_device_size(to_bufio(bm));
  333. }
  334. static int dm_bm_validate_buffer(struct dm_block_manager *bm,
  335. struct dm_buffer *buf,
  336. struct buffer_aux *aux,
  337. struct dm_block_validator *v)
  338. {
  339. if (unlikely(!aux->validator)) {
  340. int r;
  341. if (!v)
  342. return 0;
  343. r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm)));
  344. if (unlikely(r))
  345. return r;
  346. aux->validator = v;
  347. } else {
  348. if (unlikely(aux->validator != v)) {
  349. DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
  350. aux->validator->name, v ? v->name : "NULL",
  351. (unsigned long long)
  352. dm_bufio_get_block_number(buf));
  353. return -EINVAL;
  354. }
  355. }
  356. return 0;
  357. }
  358. int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
  359. struct dm_block_validator *v,
  360. struct dm_block **result)
  361. {
  362. struct buffer_aux *aux;
  363. void *p;
  364. int r;
  365. p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
  366. if (unlikely(IS_ERR(p)))
  367. return PTR_ERR(p);
  368. aux = dm_bufio_get_aux_data(to_buffer(*result));
  369. r = bl_down_read(&aux->lock);
  370. if (unlikely(r)) {
  371. dm_bufio_release(to_buffer(*result));
  372. report_recursive_bug(b, r);
  373. return r;
  374. }
  375. aux->write_locked = 0;
  376. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  377. if (unlikely(r)) {
  378. bl_up_read(&aux->lock);
  379. dm_bufio_release(to_buffer(*result));
  380. return r;
  381. }
  382. return 0;
  383. }
  384. EXPORT_SYMBOL_GPL(dm_bm_read_lock);
  385. int dm_bm_write_lock(struct dm_block_manager *bm,
  386. dm_block_t b, struct dm_block_validator *v,
  387. struct dm_block **result)
  388. {
  389. struct buffer_aux *aux;
  390. void *p;
  391. int r;
  392. p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
  393. if (unlikely(IS_ERR(p)))
  394. return PTR_ERR(p);
  395. aux = dm_bufio_get_aux_data(to_buffer(*result));
  396. r = bl_down_write(&aux->lock);
  397. if (r) {
  398. dm_bufio_release(to_buffer(*result));
  399. report_recursive_bug(b, r);
  400. return r;
  401. }
  402. aux->write_locked = 1;
  403. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  404. if (unlikely(r)) {
  405. bl_up_write(&aux->lock);
  406. dm_bufio_release(to_buffer(*result));
  407. return r;
  408. }
  409. return 0;
  410. }
  411. EXPORT_SYMBOL_GPL(dm_bm_write_lock);
  412. int dm_bm_read_try_lock(struct dm_block_manager *bm,
  413. dm_block_t b, struct dm_block_validator *v,
  414. struct dm_block **result)
  415. {
  416. struct buffer_aux *aux;
  417. void *p;
  418. int r;
  419. p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result);
  420. if (unlikely(IS_ERR(p)))
  421. return PTR_ERR(p);
  422. if (unlikely(!p))
  423. return -EWOULDBLOCK;
  424. aux = dm_bufio_get_aux_data(to_buffer(*result));
  425. r = bl_down_read_nonblock(&aux->lock);
  426. if (r < 0) {
  427. dm_bufio_release(to_buffer(*result));
  428. report_recursive_bug(b, r);
  429. return r;
  430. }
  431. aux->write_locked = 0;
  432. r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
  433. if (unlikely(r)) {
  434. bl_up_read(&aux->lock);
  435. dm_bufio_release(to_buffer(*result));
  436. return r;
  437. }
  438. return 0;
  439. }
  440. int dm_bm_write_lock_zero(struct dm_block_manager *bm,
  441. dm_block_t b, struct dm_block_validator *v,
  442. struct dm_block **result)
  443. {
  444. int r;
  445. struct buffer_aux *aux;
  446. void *p;
  447. p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result);
  448. if (unlikely(IS_ERR(p)))
  449. return PTR_ERR(p);
  450. memset(p, 0, dm_bm_block_size(bm));
  451. aux = dm_bufio_get_aux_data(to_buffer(*result));
  452. r = bl_down_write(&aux->lock);
  453. if (r) {
  454. dm_bufio_release(to_buffer(*result));
  455. return r;
  456. }
  457. aux->write_locked = 1;
  458. aux->validator = v;
  459. return 0;
  460. }
  461. int dm_bm_unlock(struct dm_block *b)
  462. {
  463. struct buffer_aux *aux;
  464. aux = dm_bufio_get_aux_data(to_buffer(b));
  465. if (aux->write_locked) {
  466. dm_bufio_mark_buffer_dirty(to_buffer(b));
  467. bl_up_write(&aux->lock);
  468. } else
  469. bl_up_read(&aux->lock);
  470. dm_bufio_release(to_buffer(b));
  471. return 0;
  472. }
  473. EXPORT_SYMBOL_GPL(dm_bm_unlock);
  474. int dm_bm_unlock_move(struct dm_block *b, dm_block_t n)
  475. {
  476. struct buffer_aux *aux;
  477. aux = dm_bufio_get_aux_data(to_buffer(b));
  478. if (aux->write_locked) {
  479. dm_bufio_mark_buffer_dirty(to_buffer(b));
  480. bl_up_write(&aux->lock);
  481. } else
  482. bl_up_read(&aux->lock);
  483. dm_bufio_release_move(to_buffer(b), n);
  484. return 0;
  485. }
  486. int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
  487. struct dm_block *superblock)
  488. {
  489. int r;
  490. r = dm_bufio_write_dirty_buffers(to_bufio(bm));
  491. if (unlikely(r))
  492. return r;
  493. r = dm_bufio_issue_flush(to_bufio(bm));
  494. if (unlikely(r))
  495. return r;
  496. dm_bm_unlock(superblock);
  497. r = dm_bufio_write_dirty_buffers(to_bufio(bm));
  498. if (unlikely(r))
  499. return r;
  500. r = dm_bufio_issue_flush(to_bufio(bm));
  501. if (unlikely(r))
  502. return r;
  503. return 0;
  504. }
  505. u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
  506. {
  507. return crc32c(~(u32) 0, data, len) ^ init_xor;
  508. }
  509. EXPORT_SYMBOL_GPL(dm_bm_checksum);
  510. /*----------------------------------------------------------------*/
  511. MODULE_LICENSE("GPL");
  512. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  513. MODULE_DESCRIPTION("Immutable metadata library for dm");
  514. /*----------------------------------------------------------------*/