dir.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. /******************************************************************************
  2. *******************************************************************************
  3. **
  4. ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
  5. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
  6. **
  7. ** This copyrighted material is made available to anyone wishing to use,
  8. ** modify, copy, or redistribute it subject to the terms and conditions
  9. ** of the GNU General Public License v.2.
  10. **
  11. *******************************************************************************
  12. ******************************************************************************/
  13. #include "dlm_internal.h"
  14. #include "lockspace.h"
  15. #include "member.h"
  16. #include "lowcomms.h"
  17. #include "rcom.h"
  18. #include "config.h"
  19. #include "memory.h"
  20. #include "recover.h"
  21. #include "util.h"
  22. #include "lock.h"
  23. #include "dir.h"
  24. static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
  25. {
  26. spin_lock(&ls->ls_recover_list_lock);
  27. list_add(&de->list, &ls->ls_recover_list);
  28. spin_unlock(&ls->ls_recover_list_lock);
  29. }
  30. static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
  31. {
  32. int found = 0;
  33. struct dlm_direntry *de;
  34. spin_lock(&ls->ls_recover_list_lock);
  35. list_for_each_entry(de, &ls->ls_recover_list, list) {
  36. if (de->length == len) {
  37. list_del(&de->list);
  38. de->master_nodeid = 0;
  39. memset(de->name, 0, len);
  40. found = 1;
  41. break;
  42. }
  43. }
  44. spin_unlock(&ls->ls_recover_list_lock);
  45. if (!found)
  46. de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
  47. return de;
  48. }
  49. void dlm_clear_free_entries(struct dlm_ls *ls)
  50. {
  51. struct dlm_direntry *de;
  52. spin_lock(&ls->ls_recover_list_lock);
  53. while (!list_empty(&ls->ls_recover_list)) {
  54. de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
  55. list);
  56. list_del(&de->list);
  57. kfree(de);
  58. }
  59. spin_unlock(&ls->ls_recover_list_lock);
  60. }
  61. /*
  62. * We use the upper 16 bits of the hash value to select the directory node.
  63. * Low bits are used for distribution of rsb's among hash buckets on each node.
  64. *
  65. * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
  66. * num_nodes to the hash value. This value in the desired range is used as an
  67. * offset into the sorted list of nodeid's to give the particular nodeid.
  68. */
  69. int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
  70. {
  71. struct list_head *tmp;
  72. struct dlm_member *memb = NULL;
  73. uint32_t node, n = 0;
  74. int nodeid;
  75. if (ls->ls_num_nodes == 1) {
  76. nodeid = dlm_our_nodeid();
  77. goto out;
  78. }
  79. if (ls->ls_node_array) {
  80. node = (hash >> 16) % ls->ls_total_weight;
  81. nodeid = ls->ls_node_array[node];
  82. goto out;
  83. }
  84. /* make_member_array() failed to kmalloc ls_node_array... */
  85. node = (hash >> 16) % ls->ls_num_nodes;
  86. list_for_each(tmp, &ls->ls_nodes) {
  87. if (n++ != node)
  88. continue;
  89. memb = list_entry(tmp, struct dlm_member, list);
  90. break;
  91. }
  92. DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
  93. ls->ls_num_nodes, n, node););
  94. nodeid = memb->nodeid;
  95. out:
  96. return nodeid;
  97. }
  98. int dlm_dir_nodeid(struct dlm_rsb *r)
  99. {
  100. return dlm_hash2nodeid(r->res_ls, r->res_hash);
  101. }
  102. static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
  103. {
  104. uint32_t val;
  105. val = jhash(name, len, 0);
  106. val &= (ls->ls_dirtbl_size - 1);
  107. return val;
  108. }
  109. static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
  110. {
  111. uint32_t bucket;
  112. bucket = dir_hash(ls, de->name, de->length);
  113. list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
  114. }
  115. static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
  116. int namelen, uint32_t bucket)
  117. {
  118. struct dlm_direntry *de;
  119. list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
  120. if (de->length == namelen && !memcmp(name, de->name, namelen))
  121. goto out;
  122. }
  123. de = NULL;
  124. out:
  125. return de;
  126. }
  127. void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
  128. {
  129. struct dlm_direntry *de;
  130. uint32_t bucket;
  131. bucket = dir_hash(ls, name, namelen);
  132. spin_lock(&ls->ls_dirtbl[bucket].lock);
  133. de = search_bucket(ls, name, namelen, bucket);
  134. if (!de) {
  135. log_error(ls, "remove fr %u none", nodeid);
  136. goto out;
  137. }
  138. if (de->master_nodeid != nodeid) {
  139. log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
  140. goto out;
  141. }
  142. list_del(&de->list);
  143. kfree(de);
  144. out:
  145. spin_unlock(&ls->ls_dirtbl[bucket].lock);
  146. }
  147. void dlm_dir_clear(struct dlm_ls *ls)
  148. {
  149. struct list_head *head;
  150. struct dlm_direntry *de;
  151. int i;
  152. DLM_ASSERT(list_empty(&ls->ls_recover_list), );
  153. for (i = 0; i < ls->ls_dirtbl_size; i++) {
  154. spin_lock(&ls->ls_dirtbl[i].lock);
  155. head = &ls->ls_dirtbl[i].list;
  156. while (!list_empty(head)) {
  157. de = list_entry(head->next, struct dlm_direntry, list);
  158. list_del(&de->list);
  159. put_free_de(ls, de);
  160. }
  161. spin_unlock(&ls->ls_dirtbl[i].lock);
  162. }
  163. }
  164. int dlm_recover_directory(struct dlm_ls *ls)
  165. {
  166. struct dlm_member *memb;
  167. struct dlm_direntry *de;
  168. char *b, *last_name = NULL;
  169. int error = -ENOMEM, last_len, count = 0;
  170. uint16_t namelen;
  171. log_debug(ls, "dlm_recover_directory");
  172. if (dlm_no_directory(ls))
  173. goto out_status;
  174. dlm_dir_clear(ls);
  175. last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
  176. if (!last_name)
  177. goto out;
  178. list_for_each_entry(memb, &ls->ls_nodes, list) {
  179. memset(last_name, 0, DLM_RESNAME_MAXLEN);
  180. last_len = 0;
  181. for (;;) {
  182. int left;
  183. error = dlm_recovery_stopped(ls);
  184. if (error)
  185. goto out_free;
  186. error = dlm_rcom_names(ls, memb->nodeid,
  187. last_name, last_len);
  188. if (error)
  189. goto out_free;
  190. schedule();
  191. /*
  192. * pick namelen/name pairs out of received buffer
  193. */
  194. b = ls->ls_recover_buf->rc_buf;
  195. left = ls->ls_recover_buf->rc_header.h_length;
  196. left -= sizeof(struct dlm_rcom);
  197. for (;;) {
  198. __be16 v;
  199. error = -EINVAL;
  200. if (left < sizeof(__be16))
  201. goto out_free;
  202. memcpy(&v, b, sizeof(__be16));
  203. namelen = be16_to_cpu(v);
  204. b += sizeof(__be16);
  205. left -= sizeof(__be16);
  206. /* namelen of 0xFFFFF marks end of names for
  207. this node; namelen of 0 marks end of the
  208. buffer */
  209. if (namelen == 0xFFFF)
  210. goto done;
  211. if (!namelen)
  212. break;
  213. if (namelen > left)
  214. goto out_free;
  215. if (namelen > DLM_RESNAME_MAXLEN)
  216. goto out_free;
  217. error = -ENOMEM;
  218. de = get_free_de(ls, namelen);
  219. if (!de)
  220. goto out_free;
  221. de->master_nodeid = memb->nodeid;
  222. de->length = namelen;
  223. last_len = namelen;
  224. memcpy(de->name, b, namelen);
  225. memcpy(last_name, b, namelen);
  226. b += namelen;
  227. left -= namelen;
  228. add_entry_to_hash(ls, de);
  229. count++;
  230. }
  231. }
  232. done:
  233. ;
  234. }
  235. out_status:
  236. error = 0;
  237. dlm_set_recover_status(ls, DLM_RS_DIR);
  238. log_debug(ls, "dlm_recover_directory %d entries", count);
  239. out_free:
  240. kfree(last_name);
  241. out:
  242. dlm_clear_free_entries(ls);
  243. return error;
  244. }
  245. static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
  246. int namelen, int *r_nodeid)
  247. {
  248. struct dlm_direntry *de, *tmp;
  249. uint32_t bucket;
  250. bucket = dir_hash(ls, name, namelen);
  251. spin_lock(&ls->ls_dirtbl[bucket].lock);
  252. de = search_bucket(ls, name, namelen, bucket);
  253. if (de) {
  254. *r_nodeid = de->master_nodeid;
  255. spin_unlock(&ls->ls_dirtbl[bucket].lock);
  256. if (*r_nodeid == nodeid)
  257. return -EEXIST;
  258. return 0;
  259. }
  260. spin_unlock(&ls->ls_dirtbl[bucket].lock);
  261. if (namelen > DLM_RESNAME_MAXLEN)
  262. return -EINVAL;
  263. de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
  264. if (!de)
  265. return -ENOMEM;
  266. de->master_nodeid = nodeid;
  267. de->length = namelen;
  268. memcpy(de->name, name, namelen);
  269. spin_lock(&ls->ls_dirtbl[bucket].lock);
  270. tmp = search_bucket(ls, name, namelen, bucket);
  271. if (tmp) {
  272. kfree(de);
  273. de = tmp;
  274. } else {
  275. list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
  276. }
  277. *r_nodeid = de->master_nodeid;
  278. spin_unlock(&ls->ls_dirtbl[bucket].lock);
  279. return 0;
  280. }
  281. int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
  282. int *r_nodeid)
  283. {
  284. return get_entry(ls, nodeid, name, namelen, r_nodeid);
  285. }
  286. static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
  287. {
  288. struct dlm_rsb *r;
  289. down_read(&ls->ls_root_sem);
  290. list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
  291. if (len == r->res_length && !memcmp(name, r->res_name, len)) {
  292. up_read(&ls->ls_root_sem);
  293. return r;
  294. }
  295. }
  296. up_read(&ls->ls_root_sem);
  297. return NULL;
  298. }
  299. /* Find the rsb where we left off (or start again), then send rsb names
  300. for rsb's we're master of and whose directory node matches the requesting
  301. node. inbuf is the rsb name last sent, inlen is the name's length */
  302. void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
  303. char *outbuf, int outlen, int nodeid)
  304. {
  305. struct list_head *list;
  306. struct dlm_rsb *r;
  307. int offset = 0, dir_nodeid;
  308. __be16 be_namelen;
  309. down_read(&ls->ls_root_sem);
  310. if (inlen > 1) {
  311. r = find_rsb_root(ls, inbuf, inlen);
  312. if (!r) {
  313. inbuf[inlen - 1] = '\0';
  314. log_error(ls, "copy_master_names from %d start %d %s",
  315. nodeid, inlen, inbuf);
  316. goto out;
  317. }
  318. list = r->res_root_list.next;
  319. } else {
  320. list = ls->ls_root_list.next;
  321. }
  322. for (offset = 0; list != &ls->ls_root_list; list = list->next) {
  323. r = list_entry(list, struct dlm_rsb, res_root_list);
  324. if (r->res_nodeid)
  325. continue;
  326. dir_nodeid = dlm_dir_nodeid(r);
  327. if (dir_nodeid != nodeid)
  328. continue;
  329. /*
  330. * The block ends when we can't fit the following in the
  331. * remaining buffer space:
  332. * namelen (uint16_t) +
  333. * name (r->res_length) +
  334. * end-of-block record 0x0000 (uint16_t)
  335. */
  336. if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
  337. /* Write end-of-block record */
  338. be_namelen = cpu_to_be16(0);
  339. memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
  340. offset += sizeof(__be16);
  341. goto out;
  342. }
  343. be_namelen = cpu_to_be16(r->res_length);
  344. memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
  345. offset += sizeof(__be16);
  346. memcpy(outbuf + offset, r->res_name, r->res_length);
  347. offset += r->res_length;
  348. }
  349. /*
  350. * If we've reached the end of the list (and there's room) write a
  351. * terminating record.
  352. */
  353. if ((list == &ls->ls_root_list) &&
  354. (offset + sizeof(uint16_t) <= outlen)) {
  355. be_namelen = cpu_to_be16(0xFFFF);
  356. memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
  357. offset += sizeof(__be16);
  358. }
  359. out:
  360. up_read(&ls->ls_root_sem);
  361. }