volume.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /* AFS volume management
  2. *
  3. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/slab.h>
  15. #include <linux/fs.h>
  16. #include <linux/pagemap.h>
  17. #include <linux/sched.h>
  18. #include "internal.h"
  19. static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  20. /*
  21. * lookup a volume by name
  22. * - this can be one of the following:
  23. * "%[cell:]volume[.]" R/W volume
  24. * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
  25. * or R/W (rwparent=1) volume
  26. * "%[cell:]volume.readonly" R/O volume
  27. * "#[cell:]volume.readonly" R/O volume
  28. * "%[cell:]volume.backup" Backup volume
  29. * "#[cell:]volume.backup" Backup volume
  30. *
  31. * The cell name is optional, and defaults to the current cell.
  32. *
  33. * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
  34. * Guide
  35. * - Rule 1: Explicit type suffix forces access of that type or nothing
  36. * (no suffix, then use Rule 2 & 3)
  37. * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
  38. * if not available
  39. * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  40. * explicitly told otherwise
  41. */
  42. struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
  43. {
  44. struct afs_vlocation *vlocation = NULL;
  45. struct afs_volume *volume = NULL;
  46. struct afs_server *server = NULL;
  47. char srvtmask;
  48. int ret, loop;
  49. _enter("{%*.*s,%d}",
  50. params->volnamesz, params->volnamesz, params->volname, params->rwpath);
  51. /* lookup the volume location record */
  52. vlocation = afs_vlocation_lookup(params->cell, params->key,
  53. params->volname, params->volnamesz);
  54. if (IS_ERR(vlocation)) {
  55. ret = PTR_ERR(vlocation);
  56. vlocation = NULL;
  57. goto error;
  58. }
  59. /* make the final decision on the type we want */
  60. ret = -ENOMEDIUM;
  61. if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
  62. goto error;
  63. srvtmask = 0;
  64. for (loop = 0; loop < vlocation->vldb.nservers; loop++)
  65. srvtmask |= vlocation->vldb.srvtmask[loop];
  66. if (params->force) {
  67. if (!(srvtmask & (1 << params->type)))
  68. goto error;
  69. } else if (srvtmask & AFS_VOL_VTM_RO) {
  70. params->type = AFSVL_ROVOL;
  71. } else if (srvtmask & AFS_VOL_VTM_RW) {
  72. params->type = AFSVL_RWVOL;
  73. } else {
  74. goto error;
  75. }
  76. down_write(&params->cell->vl_sem);
  77. /* is the volume already active? */
  78. if (vlocation->vols[params->type]) {
  79. /* yes - re-use it */
  80. volume = vlocation->vols[params->type];
  81. afs_get_volume(volume);
  82. goto success;
  83. }
  84. /* create a new volume record */
  85. _debug("creating new volume record");
  86. ret = -ENOMEM;
  87. volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
  88. if (!volume)
  89. goto error_up;
  90. atomic_set(&volume->usage, 1);
  91. volume->type = params->type;
  92. volume->type_force = params->force;
  93. volume->cell = params->cell;
  94. volume->vid = vlocation->vldb.vid[params->type];
  95. ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
  96. if (ret)
  97. goto error_bdi;
  98. init_rwsem(&volume->server_sem);
  99. /* look up all the applicable server records */
  100. for (loop = 0; loop < 8; loop++) {
  101. if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
  102. server = afs_lookup_server(
  103. volume->cell, &vlocation->vldb.servers[loop]);
  104. if (IS_ERR(server)) {
  105. ret = PTR_ERR(server);
  106. goto error_discard;
  107. }
  108. volume->servers[volume->nservers] = server;
  109. volume->nservers++;
  110. }
  111. }
  112. /* attach the cache and volume location */
  113. #ifdef CONFIG_AFS_FSCACHE
  114. volume->cache = fscache_acquire_cookie(vlocation->cache,
  115. &afs_volume_cache_index_def,
  116. volume);
  117. #endif
  118. afs_get_vlocation(vlocation);
  119. volume->vlocation = vlocation;
  120. vlocation->vols[volume->type] = volume;
  121. success:
  122. _debug("kAFS selected %s volume %08x",
  123. afs_voltypes[volume->type], volume->vid);
  124. up_write(&params->cell->vl_sem);
  125. afs_put_vlocation(vlocation);
  126. _leave(" = %p", volume);
  127. return volume;
  128. /* clean up */
  129. error_up:
  130. up_write(&params->cell->vl_sem);
  131. error:
  132. afs_put_vlocation(vlocation);
  133. _leave(" = %d", ret);
  134. return ERR_PTR(ret);
  135. error_discard:
  136. bdi_destroy(&volume->bdi);
  137. error_bdi:
  138. up_write(&params->cell->vl_sem);
  139. for (loop = volume->nservers - 1; loop >= 0; loop--)
  140. afs_put_server(volume->servers[loop]);
  141. kfree(volume);
  142. goto error;
  143. }
  144. /*
  145. * destroy a volume record
  146. */
  147. void afs_put_volume(struct afs_volume *volume)
  148. {
  149. struct afs_vlocation *vlocation;
  150. int loop;
  151. if (!volume)
  152. return;
  153. _enter("%p", volume);
  154. ASSERTCMP(atomic_read(&volume->usage), >, 0);
  155. vlocation = volume->vlocation;
  156. /* to prevent a race, the decrement and the dequeue must be effectively
  157. * atomic */
  158. down_write(&vlocation->cell->vl_sem);
  159. if (likely(!atomic_dec_and_test(&volume->usage))) {
  160. up_write(&vlocation->cell->vl_sem);
  161. _leave("");
  162. return;
  163. }
  164. vlocation->vols[volume->type] = NULL;
  165. up_write(&vlocation->cell->vl_sem);
  166. /* finish cleaning up the volume */
  167. #ifdef CONFIG_AFS_FSCACHE
  168. fscache_relinquish_cookie(volume->cache, 0);
  169. #endif
  170. afs_put_vlocation(vlocation);
  171. for (loop = volume->nservers - 1; loop >= 0; loop--)
  172. afs_put_server(volume->servers[loop]);
  173. bdi_destroy(&volume->bdi);
  174. kfree(volume);
  175. _leave(" [destroyed]");
  176. }
  177. /*
  178. * pick a server to use to try accessing this volume
  179. * - returns with an elevated usage count on the server chosen
  180. */
  181. struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
  182. {
  183. struct afs_volume *volume = vnode->volume;
  184. struct afs_server *server;
  185. int ret, state, loop;
  186. _enter("%s", volume->vlocation->vldb.name);
  187. /* stick with the server we're already using if we can */
  188. if (vnode->server && vnode->server->fs_state == 0) {
  189. afs_get_server(vnode->server);
  190. _leave(" = %p [current]", vnode->server);
  191. return vnode->server;
  192. }
  193. down_read(&volume->server_sem);
  194. /* handle the no-server case */
  195. if (volume->nservers == 0) {
  196. ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
  197. up_read(&volume->server_sem);
  198. _leave(" = %d [no servers]", ret);
  199. return ERR_PTR(ret);
  200. }
  201. /* basically, just search the list for the first live server and use
  202. * that */
  203. ret = 0;
  204. for (loop = 0; loop < volume->nservers; loop++) {
  205. server = volume->servers[loop];
  206. state = server->fs_state;
  207. _debug("consider %d [%d]", loop, state);
  208. switch (state) {
  209. /* found an apparently healthy server */
  210. case 0:
  211. afs_get_server(server);
  212. up_read(&volume->server_sem);
  213. _leave(" = %p (picked %08x)",
  214. server, ntohl(server->addr.s_addr));
  215. return server;
  216. case -ENETUNREACH:
  217. if (ret == 0)
  218. ret = state;
  219. break;
  220. case -EHOSTUNREACH:
  221. if (ret == 0 ||
  222. ret == -ENETUNREACH)
  223. ret = state;
  224. break;
  225. case -ECONNREFUSED:
  226. if (ret == 0 ||
  227. ret == -ENETUNREACH ||
  228. ret == -EHOSTUNREACH)
  229. ret = state;
  230. break;
  231. default:
  232. case -EREMOTEIO:
  233. if (ret == 0 ||
  234. ret == -ENETUNREACH ||
  235. ret == -EHOSTUNREACH ||
  236. ret == -ECONNREFUSED)
  237. ret = state;
  238. break;
  239. }
  240. }
  241. /* no available servers
  242. * - TODO: handle the no active servers case better
  243. */
  244. up_read(&volume->server_sem);
  245. _leave(" = %d", ret);
  246. return ERR_PTR(ret);
  247. }
  248. /*
  249. * release a server after use
  250. * - releases the ref on the server struct that was acquired by picking
  251. * - records result of using a particular server to access a volume
  252. * - return 0 to try again, 1 if okay or to issue error
  253. * - the caller must release the server struct if result was 0
  254. */
  255. int afs_volume_release_fileserver(struct afs_vnode *vnode,
  256. struct afs_server *server,
  257. int result)
  258. {
  259. struct afs_volume *volume = vnode->volume;
  260. unsigned loop;
  261. _enter("%s,%08x,%d",
  262. volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
  263. result);
  264. switch (result) {
  265. /* success */
  266. case 0:
  267. server->fs_act_jif = jiffies;
  268. server->fs_state = 0;
  269. _leave("");
  270. return 1;
  271. /* the fileserver denied all knowledge of the volume */
  272. case -ENOMEDIUM:
  273. server->fs_act_jif = jiffies;
  274. down_write(&volume->server_sem);
  275. /* firstly, find where the server is in the active list (if it
  276. * is) */
  277. for (loop = 0; loop < volume->nservers; loop++)
  278. if (volume->servers[loop] == server)
  279. goto present;
  280. /* no longer there - may have been discarded by another op */
  281. goto try_next_server_upw;
  282. present:
  283. volume->nservers--;
  284. memmove(&volume->servers[loop],
  285. &volume->servers[loop + 1],
  286. sizeof(volume->servers[loop]) *
  287. (volume->nservers - loop));
  288. volume->servers[volume->nservers] = NULL;
  289. afs_put_server(server);
  290. volume->rjservers++;
  291. if (volume->nservers > 0)
  292. /* another server might acknowledge its existence */
  293. goto try_next_server_upw;
  294. /* handle the case where all the fileservers have rejected the
  295. * volume
  296. * - TODO: try asking the fileservers for volume information
  297. * - TODO: contact the VL server again to see if the volume is
  298. * no longer registered
  299. */
  300. up_write(&volume->server_sem);
  301. afs_put_server(server);
  302. _leave(" [completely rejected]");
  303. return 1;
  304. /* problem reaching the server */
  305. case -ENETUNREACH:
  306. case -EHOSTUNREACH:
  307. case -ECONNREFUSED:
  308. case -ETIME:
  309. case -ETIMEDOUT:
  310. case -EREMOTEIO:
  311. /* mark the server as dead
  312. * TODO: vary dead timeout depending on error
  313. */
  314. spin_lock(&server->fs_lock);
  315. if (!server->fs_state) {
  316. server->fs_dead_jif = jiffies + HZ * 10;
  317. server->fs_state = result;
  318. printk("kAFS: SERVER DEAD state=%d\n", result);
  319. }
  320. spin_unlock(&server->fs_lock);
  321. goto try_next_server;
  322. /* miscellaneous error */
  323. default:
  324. server->fs_act_jif = jiffies;
  325. case -ENOMEM:
  326. case -ENONET:
  327. /* tell the caller to accept the result */
  328. afs_put_server(server);
  329. _leave(" [local failure]");
  330. return 1;
  331. }
  332. /* tell the caller to loop around and try the next server */
  333. try_next_server_upw:
  334. up_write(&volume->server_sem);
  335. try_next_server:
  336. afs_put_server(server);
  337. _leave(" [try next server]");
  338. return 0;
  339. }