xfs_inode_item.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_mount.h"
  28. #include "xfs_trans_priv.h"
  29. #include "xfs_bmap_btree.h"
  30. #include "xfs_dinode.h"
  31. #include "xfs_inode.h"
  32. #include "xfs_inode_item.h"
  33. #include "xfs_error.h"
  34. #include "xfs_trace.h"
  35. kmem_zone_t *xfs_ili_zone; /* inode log item zone */
  36. static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
  37. {
  38. return container_of(lip, struct xfs_inode_log_item, ili_item);
  39. }
  40. /*
  41. * This returns the number of iovecs needed to log the given inode item.
  42. *
  43. * We need one iovec for the inode log format structure, one for the
  44. * inode core, and possibly one for the inode data/extents/b-tree root
  45. * and one for the inode attribute data/extents/b-tree root.
  46. */
  47. STATIC uint
  48. xfs_inode_item_size(
  49. struct xfs_log_item *lip)
  50. {
  51. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  52. struct xfs_inode *ip = iip->ili_inode;
  53. uint nvecs = 2;
  54. switch (ip->i_d.di_format) {
  55. case XFS_DINODE_FMT_EXTENTS:
  56. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  57. ip->i_d.di_nextents > 0 &&
  58. ip->i_df.if_bytes > 0)
  59. nvecs++;
  60. break;
  61. case XFS_DINODE_FMT_BTREE:
  62. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  63. ip->i_df.if_broot_bytes > 0)
  64. nvecs++;
  65. break;
  66. case XFS_DINODE_FMT_LOCAL:
  67. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  68. ip->i_df.if_bytes > 0)
  69. nvecs++;
  70. break;
  71. case XFS_DINODE_FMT_DEV:
  72. case XFS_DINODE_FMT_UUID:
  73. break;
  74. default:
  75. ASSERT(0);
  76. break;
  77. }
  78. if (!XFS_IFORK_Q(ip))
  79. return nvecs;
  80. /*
  81. * Log any necessary attribute data.
  82. */
  83. switch (ip->i_d.di_aformat) {
  84. case XFS_DINODE_FMT_EXTENTS:
  85. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  86. ip->i_d.di_anextents > 0 &&
  87. ip->i_afp->if_bytes > 0)
  88. nvecs++;
  89. break;
  90. case XFS_DINODE_FMT_BTREE:
  91. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  92. ip->i_afp->if_broot_bytes > 0)
  93. nvecs++;
  94. break;
  95. case XFS_DINODE_FMT_LOCAL:
  96. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  97. ip->i_afp->if_bytes > 0)
  98. nvecs++;
  99. break;
  100. default:
  101. ASSERT(0);
  102. break;
  103. }
  104. return nvecs;
  105. }
  106. /*
  107. * xfs_inode_item_format_extents - convert in-core extents to on-disk form
  108. *
  109. * For either the data or attr fork in extent format, we need to endian convert
  110. * the in-core extent as we place them into the on-disk inode. In this case, we
  111. * need to do this conversion before we write the extents into the log. Because
  112. * we don't have the disk inode to write into here, we allocate a buffer and
  113. * format the extents into it via xfs_iextents_copy(). We free the buffer in
  114. * the unlock routine after the copy for the log has been made.
  115. *
  116. * In the case of the data fork, the in-core and on-disk fork sizes can be
  117. * different due to delayed allocation extents. We only log on-disk extents
  118. * here, so always use the physical fork size to determine the size of the
  119. * buffer we need to allocate.
  120. */
  121. STATIC void
  122. xfs_inode_item_format_extents(
  123. struct xfs_inode *ip,
  124. struct xfs_log_iovec *vecp,
  125. int whichfork,
  126. int type)
  127. {
  128. xfs_bmbt_rec_t *ext_buffer;
  129. ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
  130. if (whichfork == XFS_DATA_FORK)
  131. ip->i_itemp->ili_extents_buf = ext_buffer;
  132. else
  133. ip->i_itemp->ili_aextents_buf = ext_buffer;
  134. vecp->i_addr = ext_buffer;
  135. vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
  136. vecp->i_type = type;
  137. }
  138. /*
  139. * This is called to fill in the vector of log iovecs for the
  140. * given inode log item. It fills the first item with an inode
  141. * log format structure, the second with the on-disk inode structure,
  142. * and a possible third and/or fourth with the inode data/extents/b-tree
  143. * root and inode attributes data/extents/b-tree root.
  144. */
  145. STATIC void
  146. xfs_inode_item_format(
  147. struct xfs_log_item *lip,
  148. struct xfs_log_iovec *vecp)
  149. {
  150. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  151. struct xfs_inode *ip = iip->ili_inode;
  152. uint nvecs;
  153. size_t data_bytes;
  154. xfs_mount_t *mp;
  155. vecp->i_addr = &iip->ili_format;
  156. vecp->i_len = sizeof(xfs_inode_log_format_t);
  157. vecp->i_type = XLOG_REG_TYPE_IFORMAT;
  158. vecp++;
  159. nvecs = 1;
  160. vecp->i_addr = &ip->i_d;
  161. vecp->i_len = sizeof(struct xfs_icdinode);
  162. vecp->i_type = XLOG_REG_TYPE_ICORE;
  163. vecp++;
  164. nvecs++;
  165. /*
  166. * If this is really an old format inode, then we need to
  167. * log it as such. This means that we have to copy the link
  168. * count from the new field to the old. We don't have to worry
  169. * about the new fields, because nothing trusts them as long as
  170. * the old inode version number is there. If the superblock already
  171. * has a new version number, then we don't bother converting back.
  172. */
  173. mp = ip->i_mount;
  174. ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
  175. if (ip->i_d.di_version == 1) {
  176. if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
  177. /*
  178. * Convert it back.
  179. */
  180. ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
  181. ip->i_d.di_onlink = ip->i_d.di_nlink;
  182. } else {
  183. /*
  184. * The superblock version has already been bumped,
  185. * so just make the conversion to the new inode
  186. * format permanent.
  187. */
  188. ip->i_d.di_version = 2;
  189. ip->i_d.di_onlink = 0;
  190. memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  191. }
  192. }
  193. switch (ip->i_d.di_format) {
  194. case XFS_DINODE_FMT_EXTENTS:
  195. iip->ili_fields &=
  196. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  197. XFS_ILOG_DEV | XFS_ILOG_UUID);
  198. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  199. ip->i_d.di_nextents > 0 &&
  200. ip->i_df.if_bytes > 0) {
  201. ASSERT(ip->i_df.if_u1.if_extents != NULL);
  202. ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
  203. ASSERT(iip->ili_extents_buf == NULL);
  204. #ifdef XFS_NATIVE_HOST
  205. if (ip->i_d.di_nextents == ip->i_df.if_bytes /
  206. (uint)sizeof(xfs_bmbt_rec_t)) {
  207. /*
  208. * There are no delayed allocation
  209. * extents, so just point to the
  210. * real extents array.
  211. */
  212. vecp->i_addr = ip->i_df.if_u1.if_extents;
  213. vecp->i_len = ip->i_df.if_bytes;
  214. vecp->i_type = XLOG_REG_TYPE_IEXT;
  215. } else
  216. #endif
  217. {
  218. xfs_inode_item_format_extents(ip, vecp,
  219. XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
  220. }
  221. ASSERT(vecp->i_len <= ip->i_df.if_bytes);
  222. iip->ili_format.ilf_dsize = vecp->i_len;
  223. vecp++;
  224. nvecs++;
  225. } else {
  226. iip->ili_fields &= ~XFS_ILOG_DEXT;
  227. }
  228. break;
  229. case XFS_DINODE_FMT_BTREE:
  230. iip->ili_fields &=
  231. ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
  232. XFS_ILOG_DEV | XFS_ILOG_UUID);
  233. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  234. ip->i_df.if_broot_bytes > 0) {
  235. ASSERT(ip->i_df.if_broot != NULL);
  236. vecp->i_addr = ip->i_df.if_broot;
  237. vecp->i_len = ip->i_df.if_broot_bytes;
  238. vecp->i_type = XLOG_REG_TYPE_IBROOT;
  239. vecp++;
  240. nvecs++;
  241. iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
  242. } else {
  243. ASSERT(!(iip->ili_fields &
  244. XFS_ILOG_DBROOT));
  245. #ifdef XFS_TRANS_DEBUG
  246. if (iip->ili_root_size > 0) {
  247. ASSERT(iip->ili_root_size ==
  248. ip->i_df.if_broot_bytes);
  249. ASSERT(memcmp(iip->ili_orig_root,
  250. ip->i_df.if_broot,
  251. iip->ili_root_size) == 0);
  252. } else {
  253. ASSERT(ip->i_df.if_broot_bytes == 0);
  254. }
  255. #endif
  256. iip->ili_fields &= ~XFS_ILOG_DBROOT;
  257. }
  258. break;
  259. case XFS_DINODE_FMT_LOCAL:
  260. iip->ili_fields &=
  261. ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
  262. XFS_ILOG_DEV | XFS_ILOG_UUID);
  263. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  264. ip->i_df.if_bytes > 0) {
  265. ASSERT(ip->i_df.if_u1.if_data != NULL);
  266. ASSERT(ip->i_d.di_size > 0);
  267. vecp->i_addr = ip->i_df.if_u1.if_data;
  268. /*
  269. * Round i_bytes up to a word boundary.
  270. * The underlying memory is guaranteed to
  271. * to be there by xfs_idata_realloc().
  272. */
  273. data_bytes = roundup(ip->i_df.if_bytes, 4);
  274. ASSERT((ip->i_df.if_real_bytes == 0) ||
  275. (ip->i_df.if_real_bytes == data_bytes));
  276. vecp->i_len = (int)data_bytes;
  277. vecp->i_type = XLOG_REG_TYPE_ILOCAL;
  278. vecp++;
  279. nvecs++;
  280. iip->ili_format.ilf_dsize = (unsigned)data_bytes;
  281. } else {
  282. iip->ili_fields &= ~XFS_ILOG_DDATA;
  283. }
  284. break;
  285. case XFS_DINODE_FMT_DEV:
  286. iip->ili_fields &=
  287. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  288. XFS_ILOG_DEXT | XFS_ILOG_UUID);
  289. if (iip->ili_fields & XFS_ILOG_DEV) {
  290. iip->ili_format.ilf_u.ilfu_rdev =
  291. ip->i_df.if_u2.if_rdev;
  292. }
  293. break;
  294. case XFS_DINODE_FMT_UUID:
  295. iip->ili_fields &=
  296. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  297. XFS_ILOG_DEXT | XFS_ILOG_DEV);
  298. if (iip->ili_fields & XFS_ILOG_UUID) {
  299. iip->ili_format.ilf_u.ilfu_uuid =
  300. ip->i_df.if_u2.if_uuid;
  301. }
  302. break;
  303. default:
  304. ASSERT(0);
  305. break;
  306. }
  307. /*
  308. * If there are no attributes associated with the file, then we're done.
  309. */
  310. if (!XFS_IFORK_Q(ip)) {
  311. iip->ili_fields &=
  312. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
  313. goto out;
  314. }
  315. switch (ip->i_d.di_aformat) {
  316. case XFS_DINODE_FMT_EXTENTS:
  317. iip->ili_fields &=
  318. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
  319. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  320. ip->i_d.di_anextents > 0 &&
  321. ip->i_afp->if_bytes > 0) {
  322. ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
  323. ip->i_d.di_anextents);
  324. ASSERT(ip->i_afp->if_u1.if_extents != NULL);
  325. #ifdef XFS_NATIVE_HOST
  326. /*
  327. * There are not delayed allocation extents
  328. * for attributes, so just point at the array.
  329. */
  330. vecp->i_addr = ip->i_afp->if_u1.if_extents;
  331. vecp->i_len = ip->i_afp->if_bytes;
  332. vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
  333. #else
  334. ASSERT(iip->ili_aextents_buf == NULL);
  335. xfs_inode_item_format_extents(ip, vecp,
  336. XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
  337. #endif
  338. iip->ili_format.ilf_asize = vecp->i_len;
  339. vecp++;
  340. nvecs++;
  341. } else {
  342. iip->ili_fields &= ~XFS_ILOG_AEXT;
  343. }
  344. break;
  345. case XFS_DINODE_FMT_BTREE:
  346. iip->ili_fields &=
  347. ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
  348. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  349. ip->i_afp->if_broot_bytes > 0) {
  350. ASSERT(ip->i_afp->if_broot != NULL);
  351. vecp->i_addr = ip->i_afp->if_broot;
  352. vecp->i_len = ip->i_afp->if_broot_bytes;
  353. vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
  354. vecp++;
  355. nvecs++;
  356. iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
  357. } else {
  358. iip->ili_fields &= ~XFS_ILOG_ABROOT;
  359. }
  360. break;
  361. case XFS_DINODE_FMT_LOCAL:
  362. iip->ili_fields &=
  363. ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
  364. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  365. ip->i_afp->if_bytes > 0) {
  366. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  367. vecp->i_addr = ip->i_afp->if_u1.if_data;
  368. /*
  369. * Round i_bytes up to a word boundary.
  370. * The underlying memory is guaranteed to
  371. * to be there by xfs_idata_realloc().
  372. */
  373. data_bytes = roundup(ip->i_afp->if_bytes, 4);
  374. ASSERT((ip->i_afp->if_real_bytes == 0) ||
  375. (ip->i_afp->if_real_bytes == data_bytes));
  376. vecp->i_len = (int)data_bytes;
  377. vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
  378. vecp++;
  379. nvecs++;
  380. iip->ili_format.ilf_asize = (unsigned)data_bytes;
  381. } else {
  382. iip->ili_fields &= ~XFS_ILOG_ADATA;
  383. }
  384. break;
  385. default:
  386. ASSERT(0);
  387. break;
  388. }
  389. out:
  390. /*
  391. * Now update the log format that goes out to disk from the in-core
  392. * values. We always write the inode core to make the arithmetic
  393. * games in recovery easier, which isn't a big deal as just about any
  394. * transaction would dirty it anyway.
  395. */
  396. iip->ili_format.ilf_fields = XFS_ILOG_CORE |
  397. (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
  398. iip->ili_format.ilf_size = nvecs;
  399. }
  400. /*
  401. * This is called to pin the inode associated with the inode log
  402. * item in memory so it cannot be written out.
  403. */
  404. STATIC void
  405. xfs_inode_item_pin(
  406. struct xfs_log_item *lip)
  407. {
  408. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  409. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  410. trace_xfs_inode_pin(ip, _RET_IP_);
  411. atomic_inc(&ip->i_pincount);
  412. }
  413. /*
  414. * This is called to unpin the inode associated with the inode log
  415. * item which was previously pinned with a call to xfs_inode_item_pin().
  416. *
  417. * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
  418. */
  419. STATIC void
  420. xfs_inode_item_unpin(
  421. struct xfs_log_item *lip,
  422. int remove)
  423. {
  424. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  425. trace_xfs_inode_unpin(ip, _RET_IP_);
  426. ASSERT(atomic_read(&ip->i_pincount) > 0);
  427. if (atomic_dec_and_test(&ip->i_pincount))
  428. wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
  429. }
  430. /*
  431. * This is called to attempt to lock the inode associated with this
  432. * inode log item, in preparation for the push routine which does the actual
  433. * iflush. Don't sleep on the inode lock or the flush lock.
  434. *
  435. * If the flush lock is already held, indicating that the inode has
  436. * been or is in the process of being flushed, then (ideally) we'd like to
  437. * see if the inode's buffer is still incore, and if so give it a nudge.
  438. * We delay doing so until the pushbuf routine, though, to avoid holding
  439. * the AIL lock across a call to the blackhole which is the buffer cache.
  440. * Also we don't want to sleep in any device strategy routines, which can happen
  441. * if we do the subsequent bawrite in here.
  442. */
  443. STATIC uint
  444. xfs_inode_item_trylock(
  445. struct xfs_log_item *lip)
  446. {
  447. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  448. struct xfs_inode *ip = iip->ili_inode;
  449. if (xfs_ipincount(ip) > 0)
  450. return XFS_ITEM_PINNED;
  451. if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
  452. return XFS_ITEM_LOCKED;
  453. if (!xfs_iflock_nowait(ip)) {
  454. /*
  455. * inode has already been flushed to the backing buffer,
  456. * leave it locked in shared mode, pushbuf routine will
  457. * unlock it.
  458. */
  459. return XFS_ITEM_PUSHBUF;
  460. }
  461. /* Stale items should force out the iclog */
  462. if (ip->i_flags & XFS_ISTALE) {
  463. xfs_ifunlock(ip);
  464. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  465. return XFS_ITEM_PINNED;
  466. }
  467. #ifdef DEBUG
  468. if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
  469. ASSERT(iip->ili_fields != 0);
  470. ASSERT(iip->ili_logged == 0);
  471. ASSERT(lip->li_flags & XFS_LI_IN_AIL);
  472. }
  473. #endif
  474. return XFS_ITEM_SUCCESS;
  475. }
  476. /*
  477. * Unlock the inode associated with the inode log item.
  478. * Clear the fields of the inode and inode log item that
  479. * are specific to the current transaction. If the
  480. * hold flags is set, do not unlock the inode.
  481. */
  482. STATIC void
  483. xfs_inode_item_unlock(
  484. struct xfs_log_item *lip)
  485. {
  486. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  487. struct xfs_inode *ip = iip->ili_inode;
  488. unsigned short lock_flags;
  489. ASSERT(ip->i_itemp != NULL);
  490. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  491. /*
  492. * If the inode needed a separate buffer with which to log
  493. * its extents, then free it now.
  494. */
  495. if (iip->ili_extents_buf != NULL) {
  496. ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
  497. ASSERT(ip->i_d.di_nextents > 0);
  498. ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
  499. ASSERT(ip->i_df.if_bytes > 0);
  500. kmem_free(iip->ili_extents_buf);
  501. iip->ili_extents_buf = NULL;
  502. }
  503. if (iip->ili_aextents_buf != NULL) {
  504. ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
  505. ASSERT(ip->i_d.di_anextents > 0);
  506. ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
  507. ASSERT(ip->i_afp->if_bytes > 0);
  508. kmem_free(iip->ili_aextents_buf);
  509. iip->ili_aextents_buf = NULL;
  510. }
  511. lock_flags = iip->ili_lock_flags;
  512. iip->ili_lock_flags = 0;
  513. if (lock_flags)
  514. xfs_iunlock(ip, lock_flags);
  515. }
  516. /*
  517. * This is called to find out where the oldest active copy of the inode log
  518. * item in the on disk log resides now that the last log write of it completed
  519. * at the given lsn. Since we always re-log all dirty data in an inode, the
  520. * latest copy in the on disk log is the only one that matters. Therefore,
  521. * simply return the given lsn.
  522. *
  523. * If the inode has been marked stale because the cluster is being freed, we
  524. * don't want to (re-)insert this inode into the AIL. There is a race condition
  525. * where the cluster buffer may be unpinned before the inode is inserted into
  526. * the AIL during transaction committed processing. If the buffer is unpinned
  527. * before the inode item has been committed and inserted, then it is possible
  528. * for the buffer to be written and IO completes before the inode is inserted
  529. * into the AIL. In that case, we'd be inserting a clean, stale inode into the
  530. * AIL which will never get removed. It will, however, get reclaimed which
  531. * triggers an assert in xfs_inode_free() complaining about freein an inode
  532. * still in the AIL.
  533. *
  534. * To avoid this, just unpin the inode directly and return a LSN of -1 so the
  535. * transaction committed code knows that it does not need to do any further
  536. * processing on the item.
  537. */
  538. STATIC xfs_lsn_t
  539. xfs_inode_item_committed(
  540. struct xfs_log_item *lip,
  541. xfs_lsn_t lsn)
  542. {
  543. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  544. struct xfs_inode *ip = iip->ili_inode;
  545. if (xfs_iflags_test(ip, XFS_ISTALE)) {
  546. xfs_inode_item_unpin(lip, 0);
  547. return -1;
  548. }
  549. return lsn;
  550. }
  551. /*
  552. * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
  553. * failed to get the inode flush lock but did get the inode locked SHARED.
  554. * Here we're trying to see if the inode buffer is incore, and if so whether it's
  555. * marked delayed write. If that's the case, we'll promote it and that will
  556. * allow the caller to write the buffer by triggering the xfsbufd to run.
  557. */
  558. STATIC bool
  559. xfs_inode_item_pushbuf(
  560. struct xfs_log_item *lip)
  561. {
  562. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  563. struct xfs_inode *ip = iip->ili_inode;
  564. struct xfs_buf *bp;
  565. bool ret = true;
  566. ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
  567. /*
  568. * If a flush is not in progress anymore, chances are that the
  569. * inode was taken off the AIL. So, just get out.
  570. */
  571. if (!xfs_isiflocked(ip) ||
  572. !(lip->li_flags & XFS_LI_IN_AIL)) {
  573. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  574. return true;
  575. }
  576. bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
  577. iip->ili_format.ilf_len, XBF_TRYLOCK);
  578. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  579. if (!bp)
  580. return true;
  581. if (XFS_BUF_ISDELAYWRITE(bp))
  582. xfs_buf_delwri_promote(bp);
  583. if (xfs_buf_ispinned(bp))
  584. ret = false;
  585. xfs_buf_relse(bp);
  586. return ret;
  587. }
  588. /*
  589. * This is called to asynchronously write the inode associated with this
  590. * inode log item out to disk. The inode will already have been locked by
  591. * a successful call to xfs_inode_item_trylock().
  592. */
  593. STATIC void
  594. xfs_inode_item_push(
  595. struct xfs_log_item *lip)
  596. {
  597. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  598. struct xfs_inode *ip = iip->ili_inode;
  599. ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
  600. ASSERT(xfs_isiflocked(ip));
  601. /*
  602. * Since we were able to lock the inode's flush lock and
  603. * we found it on the AIL, the inode must be dirty. This
  604. * is because the inode is removed from the AIL while still
  605. * holding the flush lock in xfs_iflush_done(). Thus, if
  606. * we found it in the AIL and were able to obtain the flush
  607. * lock without sleeping, then there must not have been
  608. * anyone in the process of flushing the inode.
  609. */
  610. ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
  611. /*
  612. * Push the inode to it's backing buffer. This will not remove the
  613. * inode from the AIL - a further push will be required to trigger a
  614. * buffer push. However, this allows all the dirty inodes to be pushed
  615. * to the buffer before it is pushed to disk. The buffer IO completion
  616. * will pull the inode from the AIL, mark it clean and unlock the flush
  617. * lock.
  618. */
  619. (void) xfs_iflush(ip, SYNC_TRYLOCK);
  620. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  621. }
  622. /*
  623. * XXX rcc - this one really has to do something. Probably needs
  624. * to stamp in a new field in the incore inode.
  625. */
  626. STATIC void
  627. xfs_inode_item_committing(
  628. struct xfs_log_item *lip,
  629. xfs_lsn_t lsn)
  630. {
  631. INODE_ITEM(lip)->ili_last_lsn = lsn;
  632. }
  633. /*
  634. * This is the ops vector shared by all buf log items.
  635. */
  636. static const struct xfs_item_ops xfs_inode_item_ops = {
  637. .iop_size = xfs_inode_item_size,
  638. .iop_format = xfs_inode_item_format,
  639. .iop_pin = xfs_inode_item_pin,
  640. .iop_unpin = xfs_inode_item_unpin,
  641. .iop_trylock = xfs_inode_item_trylock,
  642. .iop_unlock = xfs_inode_item_unlock,
  643. .iop_committed = xfs_inode_item_committed,
  644. .iop_push = xfs_inode_item_push,
  645. .iop_pushbuf = xfs_inode_item_pushbuf,
  646. .iop_committing = xfs_inode_item_committing
  647. };
  648. /*
  649. * Initialize the inode log item for a newly allocated (in-core) inode.
  650. */
  651. void
  652. xfs_inode_item_init(
  653. struct xfs_inode *ip,
  654. struct xfs_mount *mp)
  655. {
  656. struct xfs_inode_log_item *iip;
  657. ASSERT(ip->i_itemp == NULL);
  658. iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
  659. iip->ili_inode = ip;
  660. xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
  661. &xfs_inode_item_ops);
  662. iip->ili_format.ilf_type = XFS_LI_INODE;
  663. iip->ili_format.ilf_ino = ip->i_ino;
  664. iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
  665. iip->ili_format.ilf_len = ip->i_imap.im_len;
  666. iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
  667. }
  668. /*
  669. * Free the inode log item and any memory hanging off of it.
  670. */
  671. void
  672. xfs_inode_item_destroy(
  673. xfs_inode_t *ip)
  674. {
  675. #ifdef XFS_TRANS_DEBUG
  676. if (ip->i_itemp->ili_root_size != 0) {
  677. kmem_free(ip->i_itemp->ili_orig_root);
  678. }
  679. #endif
  680. kmem_zone_free(xfs_ili_zone, ip->i_itemp);
  681. }
  682. /*
  683. * This is the inode flushing I/O completion routine. It is called
  684. * from interrupt level when the buffer containing the inode is
  685. * flushed to disk. It is responsible for removing the inode item
  686. * from the AIL if it has not been re-logged, and unlocking the inode's
  687. * flush lock.
  688. *
  689. * To reduce AIL lock traffic as much as possible, we scan the buffer log item
  690. * list for other inodes that will run this function. We remove them from the
  691. * buffer list so we can process all the inode IO completions in one AIL lock
  692. * traversal.
  693. */
  694. void
  695. xfs_iflush_done(
  696. struct xfs_buf *bp,
  697. struct xfs_log_item *lip)
  698. {
  699. struct xfs_inode_log_item *iip;
  700. struct xfs_log_item *blip;
  701. struct xfs_log_item *next;
  702. struct xfs_log_item *prev;
  703. struct xfs_ail *ailp = lip->li_ailp;
  704. int need_ail = 0;
  705. /*
  706. * Scan the buffer IO completions for other inodes being completed and
  707. * attach them to the current inode log item.
  708. */
  709. blip = bp->b_fspriv;
  710. prev = NULL;
  711. while (blip != NULL) {
  712. if (lip->li_cb != xfs_iflush_done) {
  713. prev = blip;
  714. blip = blip->li_bio_list;
  715. continue;
  716. }
  717. /* remove from list */
  718. next = blip->li_bio_list;
  719. if (!prev) {
  720. bp->b_fspriv = next;
  721. } else {
  722. prev->li_bio_list = next;
  723. }
  724. /* add to current list */
  725. blip->li_bio_list = lip->li_bio_list;
  726. lip->li_bio_list = blip;
  727. /*
  728. * while we have the item, do the unlocked check for needing
  729. * the AIL lock.
  730. */
  731. iip = INODE_ITEM(blip);
  732. if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
  733. need_ail++;
  734. blip = next;
  735. }
  736. /* make sure we capture the state of the initial inode. */
  737. iip = INODE_ITEM(lip);
  738. if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
  739. need_ail++;
  740. /*
  741. * We only want to pull the item from the AIL if it is
  742. * actually there and its location in the log has not
  743. * changed since we started the flush. Thus, we only bother
  744. * if the ili_logged flag is set and the inode's lsn has not
  745. * changed. First we check the lsn outside
  746. * the lock since it's cheaper, and then we recheck while
  747. * holding the lock before removing the inode from the AIL.
  748. */
  749. if (need_ail) {
  750. struct xfs_log_item *log_items[need_ail];
  751. int i = 0;
  752. spin_lock(&ailp->xa_lock);
  753. for (blip = lip; blip; blip = blip->li_bio_list) {
  754. iip = INODE_ITEM(blip);
  755. if (iip->ili_logged &&
  756. blip->li_lsn == iip->ili_flush_lsn) {
  757. log_items[i++] = blip;
  758. }
  759. ASSERT(i <= need_ail);
  760. }
  761. /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
  762. xfs_trans_ail_delete_bulk(ailp, log_items, i);
  763. }
  764. /*
  765. * clean up and unlock the flush lock now we are done. We can clear the
  766. * ili_last_fields bits now that we know that the data corresponding to
  767. * them is safely on disk.
  768. */
  769. for (blip = lip; blip; blip = next) {
  770. next = blip->li_bio_list;
  771. blip->li_bio_list = NULL;
  772. iip = INODE_ITEM(blip);
  773. iip->ili_logged = 0;
  774. iip->ili_last_fields = 0;
  775. xfs_ifunlock(iip->ili_inode);
  776. }
  777. }
  778. /*
  779. * This is the inode flushing abort routine. It is called
  780. * from xfs_iflush when the filesystem is shutting down to clean
  781. * up the inode state.
  782. * It is responsible for removing the inode item
  783. * from the AIL if it has not been re-logged, and unlocking the inode's
  784. * flush lock.
  785. */
  786. void
  787. xfs_iflush_abort(
  788. xfs_inode_t *ip)
  789. {
  790. xfs_inode_log_item_t *iip = ip->i_itemp;
  791. if (iip) {
  792. struct xfs_ail *ailp = iip->ili_item.li_ailp;
  793. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  794. spin_lock(&ailp->xa_lock);
  795. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  796. /* xfs_trans_ail_delete() drops the AIL lock. */
  797. xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
  798. } else
  799. spin_unlock(&ailp->xa_lock);
  800. }
  801. iip->ili_logged = 0;
  802. /*
  803. * Clear the ili_last_fields bits now that we know that the
  804. * data corresponding to them is safely on disk.
  805. */
  806. iip->ili_last_fields = 0;
  807. /*
  808. * Clear the inode logging fields so no more flushes are
  809. * attempted.
  810. */
  811. iip->ili_fields = 0;
  812. }
  813. /*
  814. * Release the inode's flush lock since we're done with it.
  815. */
  816. xfs_ifunlock(ip);
  817. }
  818. void
  819. xfs_istale_done(
  820. struct xfs_buf *bp,
  821. struct xfs_log_item *lip)
  822. {
  823. xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
  824. }
  825. /*
  826. * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
  827. * (which can have different field alignments) to the native version
  828. */
  829. int
  830. xfs_inode_item_format_convert(
  831. xfs_log_iovec_t *buf,
  832. xfs_inode_log_format_t *in_f)
  833. {
  834. if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
  835. xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
  836. in_f->ilf_type = in_f32->ilf_type;
  837. in_f->ilf_size = in_f32->ilf_size;
  838. in_f->ilf_fields = in_f32->ilf_fields;
  839. in_f->ilf_asize = in_f32->ilf_asize;
  840. in_f->ilf_dsize = in_f32->ilf_dsize;
  841. in_f->ilf_ino = in_f32->ilf_ino;
  842. /* copy biggest field of ilf_u */
  843. memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
  844. in_f32->ilf_u.ilfu_uuid.__u_bits,
  845. sizeof(uuid_t));
  846. in_f->ilf_blkno = in_f32->ilf_blkno;
  847. in_f->ilf_len = in_f32->ilf_len;
  848. in_f->ilf_boffset = in_f32->ilf_boffset;
  849. return 0;
  850. } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
  851. xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
  852. in_f->ilf_type = in_f64->ilf_type;
  853. in_f->ilf_size = in_f64->ilf_size;
  854. in_f->ilf_fields = in_f64->ilf_fields;
  855. in_f->ilf_asize = in_f64->ilf_asize;
  856. in_f->ilf_dsize = in_f64->ilf_dsize;
  857. in_f->ilf_ino = in_f64->ilf_ino;
  858. /* copy biggest field of ilf_u */
  859. memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
  860. in_f64->ilf_u.ilfu_uuid.__u_bits,
  861. sizeof(uuid_t));
  862. in_f->ilf_blkno = in_f64->ilf_blkno;
  863. in_f->ilf_len = in_f64->ilf_len;
  864. in_f->ilf_boffset = in_f64->ilf_boffset;
  865. return 0;
  866. }
  867. return EFSCORRUPTED;
  868. }