plock.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. /*
  2. * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
  3. *
  4. * This copyrighted material is made available to anyone wishing to use,
  5. * modify, copy, or redistribute it subject to the terms and conditions
  6. * of the GNU General Public License version 2.
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/miscdevice.h>
  10. #include <linux/poll.h>
  11. #include <linux/dlm.h>
  12. #include <linux/dlm_plock.h>
  13. #include <linux/slab.h>
  14. #include "dlm_internal.h"
  15. #include "lockspace.h"
  16. static spinlock_t ops_lock;
  17. static struct list_head send_list;
  18. static struct list_head recv_list;
  19. static wait_queue_head_t send_wq;
  20. static wait_queue_head_t recv_wq;
  21. struct plock_op {
  22. struct list_head list;
  23. int done;
  24. struct dlm_plock_info info;
  25. };
  26. struct plock_xop {
  27. struct plock_op xop;
  28. void *callback;
  29. void *fl;
  30. void *file;
  31. struct file_lock flc;
  32. };
  33. static inline void set_version(struct dlm_plock_info *info)
  34. {
  35. info->version[0] = DLM_PLOCK_VERSION_MAJOR;
  36. info->version[1] = DLM_PLOCK_VERSION_MINOR;
  37. info->version[2] = DLM_PLOCK_VERSION_PATCH;
  38. }
  39. static int check_version(struct dlm_plock_info *info)
  40. {
  41. if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
  42. (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
  43. log_print("plock device version mismatch: "
  44. "kernel (%u.%u.%u), user (%u.%u.%u)",
  45. DLM_PLOCK_VERSION_MAJOR,
  46. DLM_PLOCK_VERSION_MINOR,
  47. DLM_PLOCK_VERSION_PATCH,
  48. info->version[0],
  49. info->version[1],
  50. info->version[2]);
  51. return -EINVAL;
  52. }
  53. return 0;
  54. }
  55. static void send_op(struct plock_op *op)
  56. {
  57. set_version(&op->info);
  58. INIT_LIST_HEAD(&op->list);
  59. spin_lock(&ops_lock);
  60. list_add_tail(&op->list, &send_list);
  61. spin_unlock(&ops_lock);
  62. wake_up(&send_wq);
  63. }
  64. /* If a process was killed while waiting for the only plock on a file,
  65. locks_remove_posix will not see any lock on the file so it won't
  66. send an unlock-close to us to pass on to userspace to clean up the
  67. abandoned waiter. So, we have to insert the unlock-close when the
  68. lock call is interrupted. */
  69. static void do_unlock_close(struct dlm_ls *ls, u64 number,
  70. struct file *file, struct file_lock *fl)
  71. {
  72. struct plock_op *op;
  73. op = kzalloc(sizeof(*op), GFP_NOFS);
  74. if (!op)
  75. return;
  76. op->info.optype = DLM_PLOCK_OP_UNLOCK;
  77. op->info.pid = fl->fl_pid;
  78. op->info.fsid = ls->ls_global_id;
  79. op->info.number = number;
  80. op->info.start = 0;
  81. op->info.end = OFFSET_MAX;
  82. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  83. op->info.owner = (__u64) fl->fl_pid;
  84. else
  85. op->info.owner = (__u64)(long) fl->fl_owner;
  86. op->info.flags |= DLM_PLOCK_FL_CLOSE;
  87. send_op(op);
  88. }
  89. int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  90. int cmd, struct file_lock *fl)
  91. {
  92. struct dlm_ls *ls;
  93. struct plock_op *op;
  94. struct plock_xop *xop;
  95. int rv;
  96. ls = dlm_find_lockspace_local(lockspace);
  97. if (!ls)
  98. return -EINVAL;
  99. xop = kzalloc(sizeof(*xop), GFP_NOFS);
  100. if (!xop) {
  101. rv = -ENOMEM;
  102. goto out;
  103. }
  104. op = &xop->xop;
  105. op->info.optype = DLM_PLOCK_OP_LOCK;
  106. op->info.pid = fl->fl_pid;
  107. op->info.ex = (fl->fl_type == F_WRLCK);
  108. op->info.wait = IS_SETLKW(cmd);
  109. op->info.fsid = ls->ls_global_id;
  110. op->info.number = number;
  111. op->info.start = fl->fl_start;
  112. op->info.end = fl->fl_end;
  113. if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
  114. /* fl_owner is lockd which doesn't distinguish
  115. processes on the nfs client */
  116. op->info.owner = (__u64) fl->fl_pid;
  117. xop->callback = fl->fl_lmops->lm_grant;
  118. locks_init_lock(&xop->flc);
  119. locks_copy_lock(&xop->flc, fl);
  120. xop->fl = fl;
  121. xop->file = file;
  122. } else {
  123. op->info.owner = (__u64)(long) fl->fl_owner;
  124. xop->callback = NULL;
  125. }
  126. send_op(op);
  127. if (xop->callback == NULL) {
  128. rv = wait_event_killable(recv_wq, (op->done != 0));
  129. if (rv == -ERESTARTSYS) {
  130. log_debug(ls, "dlm_posix_lock: wait killed %llx",
  131. (unsigned long long)number);
  132. spin_lock(&ops_lock);
  133. list_del(&op->list);
  134. spin_unlock(&ops_lock);
  135. kfree(xop);
  136. do_unlock_close(ls, number, file, fl);
  137. goto out;
  138. }
  139. } else {
  140. rv = FILE_LOCK_DEFERRED;
  141. goto out;
  142. }
  143. spin_lock(&ops_lock);
  144. if (!list_empty(&op->list)) {
  145. log_error(ls, "dlm_posix_lock: op on list %llx",
  146. (unsigned long long)number);
  147. list_del(&op->list);
  148. }
  149. spin_unlock(&ops_lock);
  150. rv = op->info.rv;
  151. if (!rv) {
  152. if (posix_lock_file_wait(file, fl) < 0)
  153. log_error(ls, "dlm_posix_lock: vfs lock error %llx",
  154. (unsigned long long)number);
  155. }
  156. kfree(xop);
  157. out:
  158. dlm_put_lockspace(ls);
  159. return rv;
  160. }
  161. EXPORT_SYMBOL_GPL(dlm_posix_lock);
  162. /* Returns failure iff a successful lock operation should be canceled */
  163. static int dlm_plock_callback(struct plock_op *op)
  164. {
  165. struct file *file;
  166. struct file_lock *fl;
  167. struct file_lock *flc;
  168. int (*notify)(void *, void *, int) = NULL;
  169. struct plock_xop *xop = (struct plock_xop *)op;
  170. int rv = 0;
  171. spin_lock(&ops_lock);
  172. if (!list_empty(&op->list)) {
  173. log_print("dlm_plock_callback: op on list %llx",
  174. (unsigned long long)op->info.number);
  175. list_del(&op->list);
  176. }
  177. spin_unlock(&ops_lock);
  178. /* check if the following 2 are still valid or make a copy */
  179. file = xop->file;
  180. flc = &xop->flc;
  181. fl = xop->fl;
  182. notify = xop->callback;
  183. if (op->info.rv) {
  184. notify(fl, NULL, op->info.rv);
  185. goto out;
  186. }
  187. /* got fs lock; bookkeep locally as well: */
  188. flc->fl_flags &= ~FL_SLEEP;
  189. if (posix_lock_file(file, flc, NULL)) {
  190. /*
  191. * This can only happen in the case of kmalloc() failure.
  192. * The filesystem's own lock is the authoritative lock,
  193. * so a failure to get the lock locally is not a disaster.
  194. * As long as the fs cannot reliably cancel locks (especially
  195. * in a low-memory situation), we're better off ignoring
  196. * this failure than trying to recover.
  197. */
  198. log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
  199. (unsigned long long)op->info.number, file, fl);
  200. }
  201. rv = notify(fl, NULL, 0);
  202. if (rv) {
  203. /* XXX: We need to cancel the fs lock here: */
  204. log_print("dlm_plock_callback: lock granted after lock request "
  205. "failed; dangling lock!\n");
  206. goto out;
  207. }
  208. out:
  209. kfree(xop);
  210. return rv;
  211. }
  212. int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  213. struct file_lock *fl)
  214. {
  215. struct dlm_ls *ls;
  216. struct plock_op *op;
  217. int rv;
  218. ls = dlm_find_lockspace_local(lockspace);
  219. if (!ls)
  220. return -EINVAL;
  221. op = kzalloc(sizeof(*op), GFP_NOFS);
  222. if (!op) {
  223. rv = -ENOMEM;
  224. goto out;
  225. }
  226. if (posix_lock_file_wait(file, fl) < 0)
  227. log_error(ls, "dlm_posix_unlock: vfs unlock error %llx",
  228. (unsigned long long)number);
  229. op->info.optype = DLM_PLOCK_OP_UNLOCK;
  230. op->info.pid = fl->fl_pid;
  231. op->info.fsid = ls->ls_global_id;
  232. op->info.number = number;
  233. op->info.start = fl->fl_start;
  234. op->info.end = fl->fl_end;
  235. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  236. op->info.owner = (__u64) fl->fl_pid;
  237. else
  238. op->info.owner = (__u64)(long) fl->fl_owner;
  239. if (fl->fl_flags & FL_CLOSE) {
  240. op->info.flags |= DLM_PLOCK_FL_CLOSE;
  241. send_op(op);
  242. rv = 0;
  243. goto out;
  244. }
  245. send_op(op);
  246. wait_event(recv_wq, (op->done != 0));
  247. spin_lock(&ops_lock);
  248. if (!list_empty(&op->list)) {
  249. log_error(ls, "dlm_posix_unlock: op on list %llx",
  250. (unsigned long long)number);
  251. list_del(&op->list);
  252. }
  253. spin_unlock(&ops_lock);
  254. rv = op->info.rv;
  255. if (rv == -ENOENT)
  256. rv = 0;
  257. kfree(op);
  258. out:
  259. dlm_put_lockspace(ls);
  260. return rv;
  261. }
  262. EXPORT_SYMBOL_GPL(dlm_posix_unlock);
  263. int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  264. struct file_lock *fl)
  265. {
  266. struct dlm_ls *ls;
  267. struct plock_op *op;
  268. int rv;
  269. ls = dlm_find_lockspace_local(lockspace);
  270. if (!ls)
  271. return -EINVAL;
  272. op = kzalloc(sizeof(*op), GFP_NOFS);
  273. if (!op) {
  274. rv = -ENOMEM;
  275. goto out;
  276. }
  277. op->info.optype = DLM_PLOCK_OP_GET;
  278. op->info.pid = fl->fl_pid;
  279. op->info.ex = (fl->fl_type == F_WRLCK);
  280. op->info.fsid = ls->ls_global_id;
  281. op->info.number = number;
  282. op->info.start = fl->fl_start;
  283. op->info.end = fl->fl_end;
  284. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  285. op->info.owner = (__u64) fl->fl_pid;
  286. else
  287. op->info.owner = (__u64)(long) fl->fl_owner;
  288. send_op(op);
  289. wait_event(recv_wq, (op->done != 0));
  290. spin_lock(&ops_lock);
  291. if (!list_empty(&op->list)) {
  292. log_error(ls, "dlm_posix_get: op on list %llx",
  293. (unsigned long long)number);
  294. list_del(&op->list);
  295. }
  296. spin_unlock(&ops_lock);
  297. /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
  298. -ENOENT if there are no locks on the file */
  299. rv = op->info.rv;
  300. fl->fl_type = F_UNLCK;
  301. if (rv == -ENOENT)
  302. rv = 0;
  303. else if (rv > 0) {
  304. locks_init_lock(fl);
  305. fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
  306. fl->fl_flags = FL_POSIX;
  307. fl->fl_pid = op->info.pid;
  308. fl->fl_start = op->info.start;
  309. fl->fl_end = op->info.end;
  310. rv = 0;
  311. }
  312. kfree(op);
  313. out:
  314. dlm_put_lockspace(ls);
  315. return rv;
  316. }
  317. EXPORT_SYMBOL_GPL(dlm_posix_get);
  318. /* a read copies out one plock request from the send list */
  319. static ssize_t dev_read(struct file *file, char __user *u, size_t count,
  320. loff_t *ppos)
  321. {
  322. struct dlm_plock_info info;
  323. struct plock_op *op = NULL;
  324. if (count < sizeof(info))
  325. return -EINVAL;
  326. spin_lock(&ops_lock);
  327. if (!list_empty(&send_list)) {
  328. op = list_entry(send_list.next, struct plock_op, list);
  329. if (op->info.flags & DLM_PLOCK_FL_CLOSE)
  330. list_del(&op->list);
  331. else
  332. list_move(&op->list, &recv_list);
  333. memcpy(&info, &op->info, sizeof(info));
  334. }
  335. spin_unlock(&ops_lock);
  336. if (!op)
  337. return -EAGAIN;
  338. /* there is no need to get a reply from userspace for unlocks
  339. that were generated by the vfs cleaning up for a close
  340. (the process did not make an unlock call). */
  341. if (op->info.flags & DLM_PLOCK_FL_CLOSE)
  342. kfree(op);
  343. if (copy_to_user(u, &info, sizeof(info)))
  344. return -EFAULT;
  345. return sizeof(info);
  346. }
  347. /* a write copies in one plock result that should match a plock_op
  348. on the recv list */
  349. static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
  350. loff_t *ppos)
  351. {
  352. struct dlm_plock_info info;
  353. struct plock_op *op;
  354. int found = 0, do_callback = 0;
  355. if (count != sizeof(info))
  356. return -EINVAL;
  357. if (copy_from_user(&info, u, sizeof(info)))
  358. return -EFAULT;
  359. if (check_version(&info))
  360. return -EINVAL;
  361. spin_lock(&ops_lock);
  362. list_for_each_entry(op, &recv_list, list) {
  363. if (op->info.fsid == info.fsid &&
  364. op->info.number == info.number &&
  365. op->info.owner == info.owner) {
  366. struct plock_xop *xop = (struct plock_xop *)op;
  367. list_del_init(&op->list);
  368. memcpy(&op->info, &info, sizeof(info));
  369. if (xop->callback)
  370. do_callback = 1;
  371. else
  372. op->done = 1;
  373. found = 1;
  374. break;
  375. }
  376. }
  377. spin_unlock(&ops_lock);
  378. if (found) {
  379. if (do_callback)
  380. dlm_plock_callback(op);
  381. else
  382. wake_up(&recv_wq);
  383. } else
  384. log_print("dev_write no op %x %llx", info.fsid,
  385. (unsigned long long)info.number);
  386. return count;
  387. }
  388. static unsigned int dev_poll(struct file *file, poll_table *wait)
  389. {
  390. unsigned int mask = 0;
  391. poll_wait(file, &send_wq, wait);
  392. spin_lock(&ops_lock);
  393. if (!list_empty(&send_list))
  394. mask = POLLIN | POLLRDNORM;
  395. spin_unlock(&ops_lock);
  396. return mask;
  397. }
  398. static const struct file_operations dev_fops = {
  399. .read = dev_read,
  400. .write = dev_write,
  401. .poll = dev_poll,
  402. .owner = THIS_MODULE,
  403. .llseek = noop_llseek,
  404. };
  405. static struct miscdevice plock_dev_misc = {
  406. .minor = MISC_DYNAMIC_MINOR,
  407. .name = DLM_PLOCK_MISC_NAME,
  408. .fops = &dev_fops
  409. };
  410. int dlm_plock_init(void)
  411. {
  412. int rv;
  413. spin_lock_init(&ops_lock);
  414. INIT_LIST_HEAD(&send_list);
  415. INIT_LIST_HEAD(&recv_list);
  416. init_waitqueue_head(&send_wq);
  417. init_waitqueue_head(&recv_wq);
  418. rv = misc_register(&plock_dev_misc);
  419. if (rv)
  420. log_print("dlm_plock_init: misc_register failed %d", rv);
  421. return rv;
  422. }
  423. void dlm_plock_exit(void)
  424. {
  425. if (misc_deregister(&plock_dev_misc) < 0)
  426. log_print("dlm_plock_exit: misc_deregister failed");
  427. }