fix_node.c 77 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594
  1. /*
  2. * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  3. */
  4. /**
  5. ** old_item_num
  6. ** old_entry_num
  7. ** set_entry_sizes
  8. ** create_virtual_node
  9. ** check_left
  10. ** check_right
  11. ** directory_part_size
  12. ** get_num_ver
  13. ** set_parameters
  14. ** is_leaf_removable
  15. ** are_leaves_removable
  16. ** get_empty_nodes
  17. ** get_lfree
  18. ** get_rfree
  19. ** is_left_neighbor_in_cache
  20. ** decrement_key
  21. ** get_far_parent
  22. ** get_parents
  23. ** can_node_be_removed
  24. ** ip_check_balance
  25. ** dc_check_balance_internal
  26. ** dc_check_balance_leaf
  27. ** dc_check_balance
  28. ** check_balance
  29. ** get_direct_parent
  30. ** get_neighbors
  31. ** fix_nodes
  32. **
  33. **
  34. **/
  35. #include <linux/time.h>
  36. #include <linux/slab.h>
  37. #include <linux/string.h>
  38. #include "reiserfs.h"
  39. #include <linux/buffer_head.h>
  40. /* To make any changes in the tree we find a node, that contains item
  41. to be changed/deleted or position in the node we insert a new item
  42. to. We call this node S. To do balancing we need to decide what we
  43. will shift to left/right neighbor, or to a new node, where new item
  44. will be etc. To make this analysis simpler we build virtual
  45. node. Virtual node is an array of items, that will replace items of
  46. node S. (For instance if we are going to delete an item, virtual
  47. node does not contain it). Virtual node keeps information about
  48. item sizes and types, mergeability of first and last items, sizes
  49. of all entries in directory item. We use this array of items when
  50. calculating what we can shift to neighbors and how many nodes we
  51. have to have if we do not any shiftings, if we shift to left/right
  52. neighbor or to both. */
  53. /* taking item number in virtual node, returns number of item, that it has in source buffer */
  54. static inline int old_item_num(int new_num, int affected_item_num, int mode)
  55. {
  56. if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
  57. return new_num;
  58. if (mode == M_INSERT) {
  59. RFALSE(new_num == 0,
  60. "vs-8005: for INSERT mode and item number of inserted item");
  61. return new_num - 1;
  62. }
  63. RFALSE(mode != M_DELETE,
  64. "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
  65. mode);
  66. /* delete mode */
  67. return new_num + 1;
  68. }
  69. static void create_virtual_node(struct tree_balance *tb, int h)
  70. {
  71. struct item_head *ih;
  72. struct virtual_node *vn = tb->tb_vn;
  73. int new_num;
  74. struct buffer_head *Sh; /* this comes from tb->S[h] */
  75. Sh = PATH_H_PBUFFER(tb->tb_path, h);
  76. /* size of changed node */
  77. vn->vn_size =
  78. MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
  79. /* for internal nodes array if virtual items is not created */
  80. if (h) {
  81. vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
  82. return;
  83. }
  84. /* number of items in virtual node */
  85. vn->vn_nr_item =
  86. B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
  87. ((vn->vn_mode == M_DELETE) ? 1 : 0);
  88. /* first virtual item */
  89. vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
  90. memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
  91. vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
  92. /* first item in the node */
  93. ih = B_N_PITEM_HEAD(Sh, 0);
  94. /* define the mergeability for 0-th item (if it is not being deleted) */
  95. if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
  96. && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
  97. vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
  98. /* go through all items those remain in the virtual node (except for the new (inserted) one) */
  99. for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
  100. int j;
  101. struct virtual_item *vi = vn->vn_vi + new_num;
  102. int is_affected =
  103. ((new_num != vn->vn_affected_item_num) ? 0 : 1);
  104. if (is_affected && vn->vn_mode == M_INSERT)
  105. continue;
  106. /* get item number in source node */
  107. j = old_item_num(new_num, vn->vn_affected_item_num,
  108. vn->vn_mode);
  109. vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
  110. vi->vi_ih = ih + j;
  111. vi->vi_item = B_I_PITEM(Sh, ih + j);
  112. vi->vi_uarea = vn->vn_free_ptr;
  113. // FIXME: there is no check, that item operation did not
  114. // consume too much memory
  115. vn->vn_free_ptr +=
  116. op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
  117. if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
  118. reiserfs_panic(tb->tb_sb, "vs-8030",
  119. "virtual node space consumed");
  120. if (!is_affected)
  121. /* this is not being changed */
  122. continue;
  123. if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
  124. vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
  125. vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
  126. }
  127. }
  128. /* virtual inserted item is not defined yet */
  129. if (vn->vn_mode == M_INSERT) {
  130. struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
  131. RFALSE(vn->vn_ins_ih == NULL,
  132. "vs-8040: item header of inserted item is not specified");
  133. vi->vi_item_len = tb->insert_size[0];
  134. vi->vi_ih = vn->vn_ins_ih;
  135. vi->vi_item = vn->vn_data;
  136. vi->vi_uarea = vn->vn_free_ptr;
  137. op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
  138. tb->insert_size[0]);
  139. }
  140. /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
  141. if (tb->CFR[0]) {
  142. struct reiserfs_key *key;
  143. key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
  144. if (op_is_left_mergeable(key, Sh->b_size)
  145. && (vn->vn_mode != M_DELETE
  146. || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
  147. vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
  148. VI_TYPE_RIGHT_MERGEABLE;
  149. #ifdef CONFIG_REISERFS_CHECK
  150. if (op_is_left_mergeable(key, Sh->b_size) &&
  151. !(vn->vn_mode != M_DELETE
  152. || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
  153. /* we delete last item and it could be merged with right neighbor's first item */
  154. if (!
  155. (B_NR_ITEMS(Sh) == 1
  156. && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
  157. && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
  158. /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
  159. print_block(Sh, 0, -1, -1);
  160. reiserfs_panic(tb->tb_sb, "vs-8045",
  161. "rdkey %k, affected item==%d "
  162. "(mode==%c) Must be %c",
  163. key, vn->vn_affected_item_num,
  164. vn->vn_mode, M_DELETE);
  165. }
  166. }
  167. #endif
  168. }
  169. }
  170. /* using virtual node check, how many items can be shifted to left
  171. neighbor */
  172. static void check_left(struct tree_balance *tb, int h, int cur_free)
  173. {
  174. int i;
  175. struct virtual_node *vn = tb->tb_vn;
  176. struct virtual_item *vi;
  177. int d_size, ih_size;
  178. RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
  179. /* internal level */
  180. if (h > 0) {
  181. tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
  182. return;
  183. }
  184. /* leaf level */
  185. if (!cur_free || !vn->vn_nr_item) {
  186. /* no free space or nothing to move */
  187. tb->lnum[h] = 0;
  188. tb->lbytes = -1;
  189. return;
  190. }
  191. RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
  192. "vs-8055: parent does not exist or invalid");
  193. vi = vn->vn_vi;
  194. if ((unsigned int)cur_free >=
  195. (vn->vn_size -
  196. ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
  197. /* all contents of S[0] fits into L[0] */
  198. RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
  199. "vs-8055: invalid mode or balance condition failed");
  200. tb->lnum[0] = vn->vn_nr_item;
  201. tb->lbytes = -1;
  202. return;
  203. }
  204. d_size = 0, ih_size = IH_SIZE;
  205. /* first item may be merge with last item in left neighbor */
  206. if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
  207. d_size = -((int)IH_SIZE), ih_size = 0;
  208. tb->lnum[0] = 0;
  209. for (i = 0; i < vn->vn_nr_item;
  210. i++, ih_size = IH_SIZE, d_size = 0, vi++) {
  211. d_size += vi->vi_item_len;
  212. if (cur_free >= d_size) {
  213. /* the item can be shifted entirely */
  214. cur_free -= d_size;
  215. tb->lnum[0]++;
  216. continue;
  217. }
  218. /* the item cannot be shifted entirely, try to split it */
  219. /* check whether L[0] can hold ih and at least one byte of the item body */
  220. if (cur_free <= ih_size) {
  221. /* cannot shift even a part of the current item */
  222. tb->lbytes = -1;
  223. return;
  224. }
  225. cur_free -= ih_size;
  226. tb->lbytes = op_check_left(vi, cur_free, 0, 0);
  227. if (tb->lbytes != -1)
  228. /* count partially shifted item */
  229. tb->lnum[0]++;
  230. break;
  231. }
  232. return;
  233. }
  234. /* using virtual node check, how many items can be shifted to right
  235. neighbor */
  236. static void check_right(struct tree_balance *tb, int h, int cur_free)
  237. {
  238. int i;
  239. struct virtual_node *vn = tb->tb_vn;
  240. struct virtual_item *vi;
  241. int d_size, ih_size;
  242. RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
  243. /* internal level */
  244. if (h > 0) {
  245. tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
  246. return;
  247. }
  248. /* leaf level */
  249. if (!cur_free || !vn->vn_nr_item) {
  250. /* no free space */
  251. tb->rnum[h] = 0;
  252. tb->rbytes = -1;
  253. return;
  254. }
  255. RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
  256. "vs-8075: parent does not exist or invalid");
  257. vi = vn->vn_vi + vn->vn_nr_item - 1;
  258. if ((unsigned int)cur_free >=
  259. (vn->vn_size -
  260. ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
  261. /* all contents of S[0] fits into R[0] */
  262. RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
  263. "vs-8080: invalid mode or balance condition failed");
  264. tb->rnum[h] = vn->vn_nr_item;
  265. tb->rbytes = -1;
  266. return;
  267. }
  268. d_size = 0, ih_size = IH_SIZE;
  269. /* last item may be merge with first item in right neighbor */
  270. if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
  271. d_size = -(int)IH_SIZE, ih_size = 0;
  272. tb->rnum[0] = 0;
  273. for (i = vn->vn_nr_item - 1; i >= 0;
  274. i--, d_size = 0, ih_size = IH_SIZE, vi--) {
  275. d_size += vi->vi_item_len;
  276. if (cur_free >= d_size) {
  277. /* the item can be shifted entirely */
  278. cur_free -= d_size;
  279. tb->rnum[0]++;
  280. continue;
  281. }
  282. /* check whether R[0] can hold ih and at least one byte of the item body */
  283. if (cur_free <= ih_size) { /* cannot shift even a part of the current item */
  284. tb->rbytes = -1;
  285. return;
  286. }
  287. /* R[0] can hold the header of the item and at least one byte of its body */
  288. cur_free -= ih_size; /* cur_free is still > 0 */
  289. tb->rbytes = op_check_right(vi, cur_free);
  290. if (tb->rbytes != -1)
  291. /* count partially shifted item */
  292. tb->rnum[0]++;
  293. break;
  294. }
  295. return;
  296. }
  297. /*
  298. * from - number of items, which are shifted to left neighbor entirely
  299. * to - number of item, which are shifted to right neighbor entirely
  300. * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
  301. * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
  302. static int get_num_ver(int mode, struct tree_balance *tb, int h,
  303. int from, int from_bytes,
  304. int to, int to_bytes, short *snum012, int flow)
  305. {
  306. int i;
  307. int cur_free;
  308. // int bytes;
  309. int units;
  310. struct virtual_node *vn = tb->tb_vn;
  311. // struct virtual_item * vi;
  312. int total_node_size, max_node_size, current_item_size;
  313. int needed_nodes;
  314. int start_item, /* position of item we start filling node from */
  315. end_item, /* position of item we finish filling node by */
  316. start_bytes, /* number of first bytes (entries for directory) of start_item-th item
  317. we do not include into node that is being filled */
  318. end_bytes; /* number of last bytes (entries for directory) of end_item-th item
  319. we do node include into node that is being filled */
  320. int split_item_positions[2]; /* these are positions in virtual item of
  321. items, that are split between S[0] and
  322. S1new and S1new and S2new */
  323. split_item_positions[0] = -1;
  324. split_item_positions[1] = -1;
  325. /* We only create additional nodes if we are in insert or paste mode
  326. or we are in replace mode at the internal level. If h is 0 and
  327. the mode is M_REPLACE then in fix_nodes we change the mode to
  328. paste or insert before we get here in the code. */
  329. RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
  330. "vs-8100: insert_size < 0 in overflow");
  331. max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
  332. /* snum012 [0-2] - number of items, that lay
  333. to S[0], first new node and second new node */
  334. snum012[3] = -1; /* s1bytes */
  335. snum012[4] = -1; /* s2bytes */
  336. /* internal level */
  337. if (h > 0) {
  338. i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
  339. if (i == max_node_size)
  340. return 1;
  341. return (i / max_node_size + 1);
  342. }
  343. /* leaf level */
  344. needed_nodes = 1;
  345. total_node_size = 0;
  346. cur_free = max_node_size;
  347. // start from 'from'-th item
  348. start_item = from;
  349. // skip its first 'start_bytes' units
  350. start_bytes = ((from_bytes != -1) ? from_bytes : 0);
  351. // last included item is the 'end_item'-th one
  352. end_item = vn->vn_nr_item - to - 1;
  353. // do not count last 'end_bytes' units of 'end_item'-th item
  354. end_bytes = (to_bytes != -1) ? to_bytes : 0;
  355. /* go through all item beginning from the start_item-th item and ending by
  356. the end_item-th item. Do not count first 'start_bytes' units of
  357. 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
  358. for (i = start_item; i <= end_item; i++) {
  359. struct virtual_item *vi = vn->vn_vi + i;
  360. int skip_from_end = ((i == end_item) ? end_bytes : 0);
  361. RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
  362. /* get size of current item */
  363. current_item_size = vi->vi_item_len;
  364. /* do not take in calculation head part (from_bytes) of from-th item */
  365. current_item_size -=
  366. op_part_size(vi, 0 /*from start */ , start_bytes);
  367. /* do not take in calculation tail part of last item */
  368. current_item_size -=
  369. op_part_size(vi, 1 /*from end */ , skip_from_end);
  370. /* if item fits into current node entierly */
  371. if (total_node_size + current_item_size <= max_node_size) {
  372. snum012[needed_nodes - 1]++;
  373. total_node_size += current_item_size;
  374. start_bytes = 0;
  375. continue;
  376. }
  377. if (current_item_size > max_node_size) {
  378. /* virtual item length is longer, than max size of item in
  379. a node. It is impossible for direct item */
  380. RFALSE(is_direct_le_ih(vi->vi_ih),
  381. "vs-8110: "
  382. "direct item length is %d. It can not be longer than %d",
  383. current_item_size, max_node_size);
  384. /* we will try to split it */
  385. flow = 1;
  386. }
  387. if (!flow) {
  388. /* as we do not split items, take new node and continue */
  389. needed_nodes++;
  390. i--;
  391. total_node_size = 0;
  392. continue;
  393. }
  394. // calculate number of item units which fit into node being
  395. // filled
  396. {
  397. int free_space;
  398. free_space = max_node_size - total_node_size - IH_SIZE;
  399. units =
  400. op_check_left(vi, free_space, start_bytes,
  401. skip_from_end);
  402. if (units == -1) {
  403. /* nothing fits into current node, take new node and continue */
  404. needed_nodes++, i--, total_node_size = 0;
  405. continue;
  406. }
  407. }
  408. /* something fits into the current node */
  409. //if (snum012[3] != -1 || needed_nodes != 1)
  410. // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
  411. //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
  412. start_bytes += units;
  413. snum012[needed_nodes - 1 + 3] = units;
  414. if (needed_nodes > 2)
  415. reiserfs_warning(tb->tb_sb, "vs-8111",
  416. "split_item_position is out of range");
  417. snum012[needed_nodes - 1]++;
  418. split_item_positions[needed_nodes - 1] = i;
  419. needed_nodes++;
  420. /* continue from the same item with start_bytes != -1 */
  421. start_item = i;
  422. i--;
  423. total_node_size = 0;
  424. }
  425. // sum012[4] (if it is not -1) contains number of units of which
  426. // are to be in S1new, snum012[3] - to be in S0. They are supposed
  427. // to be S1bytes and S2bytes correspondingly, so recalculate
  428. if (snum012[4] > 0) {
  429. int split_item_num;
  430. int bytes_to_r, bytes_to_l;
  431. int bytes_to_S1new;
  432. split_item_num = split_item_positions[1];
  433. bytes_to_l =
  434. ((from == split_item_num
  435. && from_bytes != -1) ? from_bytes : 0);
  436. bytes_to_r =
  437. ((end_item == split_item_num
  438. && end_bytes != -1) ? end_bytes : 0);
  439. bytes_to_S1new =
  440. ((split_item_positions[0] ==
  441. split_item_positions[1]) ? snum012[3] : 0);
  442. // s2bytes
  443. snum012[4] =
  444. op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
  445. bytes_to_r - bytes_to_l - bytes_to_S1new;
  446. if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
  447. vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
  448. reiserfs_warning(tb->tb_sb, "vs-8115",
  449. "not directory or indirect item");
  450. }
  451. /* now we know S2bytes, calculate S1bytes */
  452. if (snum012[3] > 0) {
  453. int split_item_num;
  454. int bytes_to_r, bytes_to_l;
  455. int bytes_to_S2new;
  456. split_item_num = split_item_positions[0];
  457. bytes_to_l =
  458. ((from == split_item_num
  459. && from_bytes != -1) ? from_bytes : 0);
  460. bytes_to_r =
  461. ((end_item == split_item_num
  462. && end_bytes != -1) ? end_bytes : 0);
  463. bytes_to_S2new =
  464. ((split_item_positions[0] == split_item_positions[1]
  465. && snum012[4] != -1) ? snum012[4] : 0);
  466. // s1bytes
  467. snum012[3] =
  468. op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
  469. bytes_to_r - bytes_to_l - bytes_to_S2new;
  470. }
  471. return needed_nodes;
  472. }
  473. /* Set parameters for balancing.
  474. * Performs write of results of analysis of balancing into structure tb,
  475. * where it will later be used by the functions that actually do the balancing.
  476. * Parameters:
  477. * tb tree_balance structure;
  478. * h current level of the node;
  479. * lnum number of items from S[h] that must be shifted to L[h];
  480. * rnum number of items from S[h] that must be shifted to R[h];
  481. * blk_num number of blocks that S[h] will be splitted into;
  482. * s012 number of items that fall into splitted nodes.
  483. * lbytes number of bytes which flow to the left neighbor from the item that is not
  484. * not shifted entirely
  485. * rbytes number of bytes which flow to the right neighbor from the item that is not
  486. * not shifted entirely
  487. * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array)
  488. */
  489. static void set_parameters(struct tree_balance *tb, int h, int lnum,
  490. int rnum, int blk_num, short *s012, int lb, int rb)
  491. {
  492. tb->lnum[h] = lnum;
  493. tb->rnum[h] = rnum;
  494. tb->blknum[h] = blk_num;
  495. if (h == 0) { /* only for leaf level */
  496. if (s012 != NULL) {
  497. tb->s0num = *s012++,
  498. tb->s1num = *s012++, tb->s2num = *s012++;
  499. tb->s1bytes = *s012++;
  500. tb->s2bytes = *s012;
  501. }
  502. tb->lbytes = lb;
  503. tb->rbytes = rb;
  504. }
  505. PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
  506. PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
  507. PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
  508. PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
  509. }
  510. /* check, does node disappear if we shift tb->lnum[0] items to left
  511. neighbor and tb->rnum[0] to the right one. */
  512. static int is_leaf_removable(struct tree_balance *tb)
  513. {
  514. struct virtual_node *vn = tb->tb_vn;
  515. int to_left, to_right;
  516. int size;
  517. int remain_items;
  518. /* number of items, that will be shifted to left (right) neighbor
  519. entirely */
  520. to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
  521. to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
  522. remain_items = vn->vn_nr_item;
  523. /* how many items remain in S[0] after shiftings to neighbors */
  524. remain_items -= (to_left + to_right);
  525. if (remain_items < 1) {
  526. /* all content of node can be shifted to neighbors */
  527. set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
  528. NULL, -1, -1);
  529. return 1;
  530. }
  531. if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
  532. /* S[0] is not removable */
  533. return 0;
  534. /* check, whether we can divide 1 remaining item between neighbors */
  535. /* get size of remaining item (in item units) */
  536. size = op_unit_num(&(vn->vn_vi[to_left]));
  537. if (tb->lbytes + tb->rbytes >= size) {
  538. set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
  539. tb->lbytes, -1);
  540. return 1;
  541. }
  542. return 0;
  543. }
  544. /* check whether L, S, R can be joined in one node */
  545. static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
  546. {
  547. struct virtual_node *vn = tb->tb_vn;
  548. int ih_size;
  549. struct buffer_head *S0;
  550. S0 = PATH_H_PBUFFER(tb->tb_path, 0);
  551. ih_size = 0;
  552. if (vn->vn_nr_item) {
  553. if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
  554. ih_size += IH_SIZE;
  555. if (vn->vn_vi[vn->vn_nr_item - 1].
  556. vi_type & VI_TYPE_RIGHT_MERGEABLE)
  557. ih_size += IH_SIZE;
  558. } else {
  559. /* there was only one item and it will be deleted */
  560. struct item_head *ih;
  561. RFALSE(B_NR_ITEMS(S0) != 1,
  562. "vs-8125: item number must be 1: it is %d",
  563. B_NR_ITEMS(S0));
  564. ih = B_N_PITEM_HEAD(S0, 0);
  565. if (tb->CFR[0]
  566. && !comp_short_le_keys(&(ih->ih_key),
  567. B_N_PDELIM_KEY(tb->CFR[0],
  568. tb->rkey[0])))
  569. if (is_direntry_le_ih(ih)) {
  570. /* Directory must be in correct state here: that is
  571. somewhere at the left side should exist first directory
  572. item. But the item being deleted can not be that first
  573. one because its right neighbor is item of the same
  574. directory. (But first item always gets deleted in last
  575. turn). So, neighbors of deleted item can be merged, so
  576. we can save ih_size */
  577. ih_size = IH_SIZE;
  578. /* we might check that left neighbor exists and is of the
  579. same directory */
  580. RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
  581. "vs-8130: first directory item can not be removed until directory is not empty");
  582. }
  583. }
  584. if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
  585. set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
  586. PROC_INFO_INC(tb->tb_sb, leaves_removable);
  587. return 1;
  588. }
  589. return 0;
  590. }
  591. /* when we do not split item, lnum and rnum are numbers of entire items */
  592. #define SET_PAR_SHIFT_LEFT \
  593. if (h)\
  594. {\
  595. int to_l;\
  596. \
  597. to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
  598. (MAX_NR_KEY(Sh) + 1 - lpar);\
  599. \
  600. set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
  601. }\
  602. else \
  603. {\
  604. if (lset==LEFT_SHIFT_FLOW)\
  605. set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
  606. tb->lbytes, -1);\
  607. else\
  608. set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
  609. -1, -1);\
  610. }
  611. #define SET_PAR_SHIFT_RIGHT \
  612. if (h)\
  613. {\
  614. int to_r;\
  615. \
  616. to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
  617. \
  618. set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
  619. }\
  620. else \
  621. {\
  622. if (rset==RIGHT_SHIFT_FLOW)\
  623. set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
  624. -1, tb->rbytes);\
  625. else\
  626. set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
  627. -1, -1);\
  628. }
  629. static void free_buffers_in_tb(struct tree_balance *tb)
  630. {
  631. int i;
  632. pathrelse(tb->tb_path);
  633. for (i = 0; i < MAX_HEIGHT; i++) {
  634. brelse(tb->L[i]);
  635. brelse(tb->R[i]);
  636. brelse(tb->FL[i]);
  637. brelse(tb->FR[i]);
  638. brelse(tb->CFL[i]);
  639. brelse(tb->CFR[i]);
  640. tb->L[i] = NULL;
  641. tb->R[i] = NULL;
  642. tb->FL[i] = NULL;
  643. tb->FR[i] = NULL;
  644. tb->CFL[i] = NULL;
  645. tb->CFR[i] = NULL;
  646. }
  647. }
  648. /* Get new buffers for storing new nodes that are created while balancing.
  649. * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
  650. * CARRY_ON - schedule didn't occur while the function worked;
  651. * NO_DISK_SPACE - no disk space.
  652. */
  653. /* The function is NOT SCHEDULE-SAFE! */
  654. static int get_empty_nodes(struct tree_balance *tb, int h)
  655. {
  656. struct buffer_head *new_bh,
  657. *Sh = PATH_H_PBUFFER(tb->tb_path, h);
  658. b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
  659. int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */
  660. retval = CARRY_ON;
  661. struct super_block *sb = tb->tb_sb;
  662. /* number_of_freeblk is the number of empty blocks which have been
  663. acquired for use by the balancing algorithm minus the number of
  664. empty blocks used in the previous levels of the analysis,
  665. number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
  666. after empty blocks are acquired, and the balancing analysis is
  667. then restarted, amount_needed is the number needed by this level
  668. (h) of the balancing analysis.
  669. Note that for systems with many processes writing, it would be
  670. more layout optimal to calculate the total number needed by all
  671. levels and then to run reiserfs_new_blocks to get all of them at once. */
  672. /* Initiate number_of_freeblk to the amount acquired prior to the restart of
  673. the analysis or 0 if not restarted, then subtract the amount needed
  674. by all of the levels of the tree below h. */
  675. /* blknum includes S[h], so we subtract 1 in this calculation */
  676. for (counter = 0, number_of_freeblk = tb->cur_blknum;
  677. counter < h; counter++)
  678. number_of_freeblk -=
  679. (tb->blknum[counter]) ? (tb->blknum[counter] -
  680. 1) : 0;
  681. /* Allocate missing empty blocks. */
  682. /* if Sh == 0 then we are getting a new root */
  683. amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
  684. /* Amount_needed = the amount that we need more than the amount that we have. */
  685. if (amount_needed > number_of_freeblk)
  686. amount_needed -= number_of_freeblk;
  687. else /* If we have enough already then there is nothing to do. */
  688. return CARRY_ON;
  689. /* No need to check quota - is not allocated for blocks used for formatted nodes */
  690. if (reiserfs_new_form_blocknrs(tb, blocknrs,
  691. amount_needed) == NO_DISK_SPACE)
  692. return NO_DISK_SPACE;
  693. /* for each blocknumber we just got, get a buffer and stick it on FEB */
  694. for (blocknr = blocknrs, counter = 0;
  695. counter < amount_needed; blocknr++, counter++) {
  696. RFALSE(!*blocknr,
  697. "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
  698. new_bh = sb_getblk(sb, *blocknr);
  699. RFALSE(buffer_dirty(new_bh) ||
  700. buffer_journaled(new_bh) ||
  701. buffer_journal_dirty(new_bh),
  702. "PAP-8140: journaled or dirty buffer %b for the new block",
  703. new_bh);
  704. /* Put empty buffers into the array. */
  705. RFALSE(tb->FEB[tb->cur_blknum],
  706. "PAP-8141: busy slot for new buffer");
  707. set_buffer_journal_new(new_bh);
  708. tb->FEB[tb->cur_blknum++] = new_bh;
  709. }
  710. if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
  711. retval = REPEAT_SEARCH;
  712. return retval;
  713. }
  714. /* Get free space of the left neighbor, which is stored in the parent
  715. * node of the left neighbor. */
  716. static int get_lfree(struct tree_balance *tb, int h)
  717. {
  718. struct buffer_head *l, *f;
  719. int order;
  720. if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
  721. (l = tb->FL[h]) == NULL)
  722. return 0;
  723. if (f == l)
  724. order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
  725. else {
  726. order = B_NR_ITEMS(l);
  727. f = l;
  728. }
  729. return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
  730. }
  731. /* Get free space of the right neighbor,
  732. * which is stored in the parent node of the right neighbor.
  733. */
  734. static int get_rfree(struct tree_balance *tb, int h)
  735. {
  736. struct buffer_head *r, *f;
  737. int order;
  738. if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
  739. (r = tb->FR[h]) == NULL)
  740. return 0;
  741. if (f == r)
  742. order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
  743. else {
  744. order = 0;
  745. f = r;
  746. }
  747. return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
  748. }
  749. /* Check whether left neighbor is in memory. */
  750. static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
  751. {
  752. struct buffer_head *father, *left;
  753. struct super_block *sb = tb->tb_sb;
  754. b_blocknr_t left_neighbor_blocknr;
  755. int left_neighbor_position;
  756. /* Father of the left neighbor does not exist. */
  757. if (!tb->FL[h])
  758. return 0;
  759. /* Calculate father of the node to be balanced. */
  760. father = PATH_H_PBUFFER(tb->tb_path, h + 1);
  761. RFALSE(!father ||
  762. !B_IS_IN_TREE(father) ||
  763. !B_IS_IN_TREE(tb->FL[h]) ||
  764. !buffer_uptodate(father) ||
  765. !buffer_uptodate(tb->FL[h]),
  766. "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
  767. father, tb->FL[h]);
  768. /* Get position of the pointer to the left neighbor into the left father. */
  769. left_neighbor_position = (father == tb->FL[h]) ?
  770. tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
  771. /* Get left neighbor block number. */
  772. left_neighbor_blocknr =
  773. B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
  774. /* Look for the left neighbor in the cache. */
  775. if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
  776. RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
  777. "vs-8170: left neighbor (%b %z) is not in the tree",
  778. left, left);
  779. put_bh(left);
  780. return 1;
  781. }
  782. return 0;
  783. }
  784. #define LEFT_PARENTS 'l'
  785. #define RIGHT_PARENTS 'r'
  786. static void decrement_key(struct cpu_key *key)
  787. {
  788. // call item specific function for this key
  789. item_ops[cpu_key_k_type(key)]->decrement_key(key);
  790. }
  791. /* Calculate far left/right parent of the left/right neighbor of the current node, that
  792. * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
  793. * Calculate left/right common parent of the current node and L[h]/R[h].
  794. * Calculate left/right delimiting key position.
  795. * Returns: PATH_INCORRECT - path in the tree is not correct;
  796. SCHEDULE_OCCURRED - schedule occurred while the function worked;
  797. * CARRY_ON - schedule didn't occur while the function worked;
  798. */
  799. static int get_far_parent(struct tree_balance *tb,
  800. int h,
  801. struct buffer_head **pfather,
  802. struct buffer_head **pcom_father, char c_lr_par)
  803. {
  804. struct buffer_head *parent;
  805. INITIALIZE_PATH(s_path_to_neighbor_father);
  806. struct treepath *path = tb->tb_path;
  807. struct cpu_key s_lr_father_key;
  808. int counter,
  809. position = INT_MAX,
  810. first_last_position = 0,
  811. path_offset = PATH_H_PATH_OFFSET(path, h);
  812. /* Starting from F[h] go upwards in the tree, and look for the common
  813. ancestor of F[h], and its neighbor l/r, that should be obtained. */
  814. counter = path_offset;
  815. RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
  816. "PAP-8180: invalid path length");
  817. for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
  818. /* Check whether parent of the current buffer in the path is really parent in the tree. */
  819. if (!B_IS_IN_TREE
  820. (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
  821. return REPEAT_SEARCH;
  822. /* Check whether position in the parent is correct. */
  823. if ((position =
  824. PATH_OFFSET_POSITION(path,
  825. counter - 1)) >
  826. B_NR_ITEMS(parent))
  827. return REPEAT_SEARCH;
  828. /* Check whether parent at the path really points to the child. */
  829. if (B_N_CHILD_NUM(parent, position) !=
  830. PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
  831. return REPEAT_SEARCH;
  832. /* Return delimiting key if position in the parent is not equal to first/last one. */
  833. if (c_lr_par == RIGHT_PARENTS)
  834. first_last_position = B_NR_ITEMS(parent);
  835. if (position != first_last_position) {
  836. *pcom_father = parent;
  837. get_bh(*pcom_father);
  838. /*(*pcom_father = parent)->b_count++; */
  839. break;
  840. }
  841. }
  842. /* if we are in the root of the tree, then there is no common father */
  843. if (counter == FIRST_PATH_ELEMENT_OFFSET) {
  844. /* Check whether first buffer in the path is the root of the tree. */
  845. if (PATH_OFFSET_PBUFFER
  846. (tb->tb_path,
  847. FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
  848. SB_ROOT_BLOCK(tb->tb_sb)) {
  849. *pfather = *pcom_father = NULL;
  850. return CARRY_ON;
  851. }
  852. return REPEAT_SEARCH;
  853. }
  854. RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
  855. "PAP-8185: (%b %z) level too small",
  856. *pcom_father, *pcom_father);
  857. /* Check whether the common parent is locked. */
  858. if (buffer_locked(*pcom_father)) {
  859. /* Release the write lock while the buffer is busy */
  860. reiserfs_write_unlock(tb->tb_sb);
  861. __wait_on_buffer(*pcom_father);
  862. reiserfs_write_lock(tb->tb_sb);
  863. if (FILESYSTEM_CHANGED_TB(tb)) {
  864. brelse(*pcom_father);
  865. return REPEAT_SEARCH;
  866. }
  867. }
  868. /* So, we got common parent of the current node and its left/right neighbor.
  869. Now we are geting the parent of the left/right neighbor. */
  870. /* Form key to get parent of the left/right neighbor. */
  871. le_key2cpu_key(&s_lr_father_key,
  872. B_N_PDELIM_KEY(*pcom_father,
  873. (c_lr_par ==
  874. LEFT_PARENTS) ? (tb->lkey[h - 1] =
  875. position -
  876. 1) : (tb->rkey[h -
  877. 1] =
  878. position)));
  879. if (c_lr_par == LEFT_PARENTS)
  880. decrement_key(&s_lr_father_key);
  881. if (search_by_key
  882. (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
  883. h + 1) == IO_ERROR)
  884. // path is released
  885. return IO_ERROR;
  886. if (FILESYSTEM_CHANGED_TB(tb)) {
  887. pathrelse(&s_path_to_neighbor_father);
  888. brelse(*pcom_father);
  889. return REPEAT_SEARCH;
  890. }
  891. *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
  892. RFALSE(B_LEVEL(*pfather) != h + 1,
  893. "PAP-8190: (%b %z) level too small", *pfather, *pfather);
  894. RFALSE(s_path_to_neighbor_father.path_length <
  895. FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
  896. s_path_to_neighbor_father.path_length--;
  897. pathrelse(&s_path_to_neighbor_father);
  898. return CARRY_ON;
  899. }
  900. /* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
  901. * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
  902. * FR[path_offset], CFL[path_offset], CFR[path_offset].
  903. * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
  904. * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
  905. * CARRY_ON - schedule didn't occur while the function worked;
  906. */
  907. static int get_parents(struct tree_balance *tb, int h)
  908. {
  909. struct treepath *path = tb->tb_path;
  910. int position,
  911. ret,
  912. path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
  913. struct buffer_head *curf, *curcf;
  914. /* Current node is the root of the tree or will be root of the tree */
  915. if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
  916. /* The root can not have parents.
  917. Release nodes which previously were obtained as parents of the current node neighbors. */
  918. brelse(tb->FL[h]);
  919. brelse(tb->CFL[h]);
  920. brelse(tb->FR[h]);
  921. brelse(tb->CFR[h]);
  922. tb->FL[h] = NULL;
  923. tb->CFL[h] = NULL;
  924. tb->FR[h] = NULL;
  925. tb->CFR[h] = NULL;
  926. return CARRY_ON;
  927. }
  928. /* Get parent FL[path_offset] of L[path_offset]. */
  929. position = PATH_OFFSET_POSITION(path, path_offset - 1);
  930. if (position) {
  931. /* Current node is not the first child of its parent. */
  932. curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
  933. curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
  934. get_bh(curf);
  935. get_bh(curf);
  936. tb->lkey[h] = position - 1;
  937. } else {
  938. /* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
  939. Calculate current common parent of L[path_offset] and the current node. Note that
  940. CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
  941. Calculate lkey[path_offset]. */
  942. if ((ret = get_far_parent(tb, h + 1, &curf,
  943. &curcf,
  944. LEFT_PARENTS)) != CARRY_ON)
  945. return ret;
  946. }
  947. brelse(tb->FL[h]);
  948. tb->FL[h] = curf; /* New initialization of FL[h]. */
  949. brelse(tb->CFL[h]);
  950. tb->CFL[h] = curcf; /* New initialization of CFL[h]. */
  951. RFALSE((curf && !B_IS_IN_TREE(curf)) ||
  952. (curcf && !B_IS_IN_TREE(curcf)),
  953. "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
  954. /* Get parent FR[h] of R[h]. */
  955. /* Current node is the last child of F[h]. FR[h] != F[h]. */
  956. if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
  957. /* Calculate current parent of R[h], which is the right neighbor of F[h].
  958. Calculate current common parent of R[h] and current node. Note that CFR[h]
  959. not equal FR[path_offset] and CFR[h] not equal F[h]. */
  960. if ((ret =
  961. get_far_parent(tb, h + 1, &curf, &curcf,
  962. RIGHT_PARENTS)) != CARRY_ON)
  963. return ret;
  964. } else {
  965. /* Current node is not the last child of its parent F[h]. */
  966. curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
  967. curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
  968. get_bh(curf);
  969. get_bh(curf);
  970. tb->rkey[h] = position;
  971. }
  972. brelse(tb->FR[h]);
  973. /* New initialization of FR[path_offset]. */
  974. tb->FR[h] = curf;
  975. brelse(tb->CFR[h]);
  976. /* New initialization of CFR[path_offset]. */
  977. tb->CFR[h] = curcf;
  978. RFALSE((curf && !B_IS_IN_TREE(curf)) ||
  979. (curcf && !B_IS_IN_TREE(curcf)),
  980. "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
  981. return CARRY_ON;
  982. }
  983. /* it is possible to remove node as result of shiftings to
  984. neighbors even when we insert or paste item. */
  985. static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
  986. struct tree_balance *tb, int h)
  987. {
  988. struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
  989. int levbytes = tb->insert_size[h];
  990. struct item_head *ih;
  991. struct reiserfs_key *r_key = NULL;
  992. ih = B_N_PITEM_HEAD(Sh, 0);
  993. if (tb->CFR[h])
  994. r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
  995. if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
  996. /* shifting may merge items which might save space */
  997. -
  998. ((!h
  999. && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
  1000. -
  1001. ((!h && r_key
  1002. && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
  1003. + ((h) ? KEY_SIZE : 0)) {
  1004. /* node can not be removed */
  1005. if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
  1006. if (!h)
  1007. tb->s0num =
  1008. B_NR_ITEMS(Sh) +
  1009. ((mode == M_INSERT) ? 1 : 0);
  1010. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1011. return NO_BALANCING_NEEDED;
  1012. }
  1013. }
  1014. PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
  1015. return !NO_BALANCING_NEEDED;
  1016. }
  1017. /* Check whether current node S[h] is balanced when increasing its size by
  1018. * Inserting or Pasting.
  1019. * Calculate parameters for balancing for current level h.
  1020. * Parameters:
  1021. * tb tree_balance structure;
  1022. * h current level of the node;
  1023. * inum item number in S[h];
  1024. * mode i - insert, p - paste;
  1025. * Returns: 1 - schedule occurred;
  1026. * 0 - balancing for higher levels needed;
  1027. * -1 - no balancing for higher levels needed;
  1028. * -2 - no disk space.
  1029. */
  1030. /* ip means Inserting or Pasting */
  1031. static int ip_check_balance(struct tree_balance *tb, int h)
  1032. {
  1033. struct virtual_node *vn = tb->tb_vn;
  1034. int levbytes, /* Number of bytes that must be inserted into (value
  1035. is negative if bytes are deleted) buffer which
  1036. contains node being balanced. The mnemonic is
  1037. that the attempted change in node space used level
  1038. is levbytes bytes. */
  1039. ret;
  1040. int lfree, sfree, rfree /* free space in L, S and R */ ;
  1041. /* nver is short for number of vertixes, and lnver is the number if
  1042. we shift to the left, rnver is the number if we shift to the
  1043. right, and lrnver is the number if we shift in both directions.
  1044. The goal is to minimize first the number of vertixes, and second,
  1045. the number of vertixes whose contents are changed by shifting,
  1046. and third the number of uncached vertixes whose contents are
  1047. changed by shifting and must be read from disk. */
  1048. int nver, lnver, rnver, lrnver;
  1049. /* used at leaf level only, S0 = S[0] is the node being balanced,
  1050. sInum [ I = 0,1,2 ] is the number of items that will
  1051. remain in node SI after balancing. S1 and S2 are new
  1052. nodes that might be created. */
  1053. /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
  1054. where 4th parameter is s1bytes and 5th - s2bytes
  1055. */
  1056. short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases
  1057. 0,1 - do not shift and do not shift but bottle
  1058. 2 - shift only whole item to left
  1059. 3 - shift to left and bottle as much as possible
  1060. 4,5 - shift to right (whole items and as much as possible
  1061. 6,7 - shift to both directions (whole items and as much as possible)
  1062. */
  1063. /* Sh is the node whose balance is currently being checked */
  1064. struct buffer_head *Sh;
  1065. Sh = PATH_H_PBUFFER(tb->tb_path, h);
  1066. levbytes = tb->insert_size[h];
  1067. /* Calculate balance parameters for creating new root. */
  1068. if (!Sh) {
  1069. if (!h)
  1070. reiserfs_panic(tb->tb_sb, "vs-8210",
  1071. "S[0] can not be 0");
  1072. switch (ret = get_empty_nodes(tb, h)) {
  1073. case CARRY_ON:
  1074. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1075. return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
  1076. case NO_DISK_SPACE:
  1077. case REPEAT_SEARCH:
  1078. return ret;
  1079. default:
  1080. reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
  1081. "return value of get_empty_nodes");
  1082. }
  1083. }
  1084. if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */
  1085. return ret;
  1086. sfree = B_FREE_SPACE(Sh);
  1087. /* get free space of neighbors */
  1088. rfree = get_rfree(tb, h);
  1089. lfree = get_lfree(tb, h);
  1090. if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
  1091. NO_BALANCING_NEEDED)
  1092. /* and new item fits into node S[h] without any shifting */
  1093. return NO_BALANCING_NEEDED;
  1094. create_virtual_node(tb, h);
  1095. /*
  1096. determine maximal number of items we can shift to the left neighbor (in tb structure)
  1097. and the maximal number of bytes that can flow to the left neighbor
  1098. from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
  1099. */
  1100. check_left(tb, h, lfree);
  1101. /*
  1102. determine maximal number of items we can shift to the right neighbor (in tb structure)
  1103. and the maximal number of bytes that can flow to the right neighbor
  1104. from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
  1105. */
  1106. check_right(tb, h, rfree);
  1107. /* all contents of internal node S[h] can be moved into its
  1108. neighbors, S[h] will be removed after balancing */
  1109. if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
  1110. int to_r;
  1111. /* Since we are working on internal nodes, and our internal
  1112. nodes have fixed size entries, then we can balance by the
  1113. number of items rather than the space they consume. In this
  1114. routine we set the left node equal to the right node,
  1115. allowing a difference of less than or equal to 1 child
  1116. pointer. */
  1117. to_r =
  1118. ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
  1119. vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
  1120. tb->rnum[h]);
  1121. set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
  1122. -1, -1);
  1123. return CARRY_ON;
  1124. }
  1125. /* this checks balance condition, that any two neighboring nodes can not fit in one node */
  1126. RFALSE(h &&
  1127. (tb->lnum[h] >= vn->vn_nr_item + 1 ||
  1128. tb->rnum[h] >= vn->vn_nr_item + 1),
  1129. "vs-8220: tree is not balanced on internal level");
  1130. RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
  1131. (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
  1132. "vs-8225: tree is not balanced on leaf level");
  1133. /* all contents of S[0] can be moved into its neighbors
  1134. S[0] will be removed after balancing. */
  1135. if (!h && is_leaf_removable(tb))
  1136. return CARRY_ON;
  1137. /* why do we perform this check here rather than earlier??
  1138. Answer: we can win 1 node in some cases above. Moreover we
  1139. checked it above, when we checked, that S[0] is not removable
  1140. in principle */
  1141. if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
  1142. if (!h)
  1143. tb->s0num = vn->vn_nr_item;
  1144. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1145. return NO_BALANCING_NEEDED;
  1146. }
  1147. {
  1148. int lpar, rpar, nset, lset, rset, lrset;
  1149. /*
  1150. * regular overflowing of the node
  1151. */
  1152. /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
  1153. lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
  1154. nset, lset, rset, lrset - shows, whether flowing items give better packing
  1155. */
  1156. #define FLOW 1
  1157. #define NO_FLOW 0 /* do not any splitting */
  1158. /* we choose one the following */
  1159. #define NOTHING_SHIFT_NO_FLOW 0
  1160. #define NOTHING_SHIFT_FLOW 5
  1161. #define LEFT_SHIFT_NO_FLOW 10
  1162. #define LEFT_SHIFT_FLOW 15
  1163. #define RIGHT_SHIFT_NO_FLOW 20
  1164. #define RIGHT_SHIFT_FLOW 25
  1165. #define LR_SHIFT_NO_FLOW 30
  1166. #define LR_SHIFT_FLOW 35
  1167. lpar = tb->lnum[h];
  1168. rpar = tb->rnum[h];
  1169. /* calculate number of blocks S[h] must be split into when
  1170. nothing is shifted to the neighbors,
  1171. as well as number of items in each part of the split node (s012 numbers),
  1172. and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
  1173. nset = NOTHING_SHIFT_NO_FLOW;
  1174. nver = get_num_ver(vn->vn_mode, tb, h,
  1175. 0, -1, h ? vn->vn_nr_item : 0, -1,
  1176. snum012, NO_FLOW);
  1177. if (!h) {
  1178. int nver1;
  1179. /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
  1180. nver1 = get_num_ver(vn->vn_mode, tb, h,
  1181. 0, -1, 0, -1,
  1182. snum012 + NOTHING_SHIFT_FLOW, FLOW);
  1183. if (nver > nver1)
  1184. nset = NOTHING_SHIFT_FLOW, nver = nver1;
  1185. }
  1186. /* calculate number of blocks S[h] must be split into when
  1187. l_shift_num first items and l_shift_bytes of the right most
  1188. liquid item to be shifted are shifted to the left neighbor,
  1189. as well as number of items in each part of the splitted node (s012 numbers),
  1190. and number of bytes (s1bytes) of the shared drop which flow to S1 if any
  1191. */
  1192. lset = LEFT_SHIFT_NO_FLOW;
  1193. lnver = get_num_ver(vn->vn_mode, tb, h,
  1194. lpar - ((h || tb->lbytes == -1) ? 0 : 1),
  1195. -1, h ? vn->vn_nr_item : 0, -1,
  1196. snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
  1197. if (!h) {
  1198. int lnver1;
  1199. lnver1 = get_num_ver(vn->vn_mode, tb, h,
  1200. lpar -
  1201. ((tb->lbytes != -1) ? 1 : 0),
  1202. tb->lbytes, 0, -1,
  1203. snum012 + LEFT_SHIFT_FLOW, FLOW);
  1204. if (lnver > lnver1)
  1205. lset = LEFT_SHIFT_FLOW, lnver = lnver1;
  1206. }
  1207. /* calculate number of blocks S[h] must be split into when
  1208. r_shift_num first items and r_shift_bytes of the left most
  1209. liquid item to be shifted are shifted to the right neighbor,
  1210. as well as number of items in each part of the splitted node (s012 numbers),
  1211. and number of bytes (s1bytes) of the shared drop which flow to S1 if any
  1212. */
  1213. rset = RIGHT_SHIFT_NO_FLOW;
  1214. rnver = get_num_ver(vn->vn_mode, tb, h,
  1215. 0, -1,
  1216. h ? (vn->vn_nr_item - rpar) : (rpar -
  1217. ((tb->
  1218. rbytes !=
  1219. -1) ? 1 :
  1220. 0)), -1,
  1221. snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
  1222. if (!h) {
  1223. int rnver1;
  1224. rnver1 = get_num_ver(vn->vn_mode, tb, h,
  1225. 0, -1,
  1226. (rpar -
  1227. ((tb->rbytes != -1) ? 1 : 0)),
  1228. tb->rbytes,
  1229. snum012 + RIGHT_SHIFT_FLOW, FLOW);
  1230. if (rnver > rnver1)
  1231. rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
  1232. }
  1233. /* calculate number of blocks S[h] must be split into when
  1234. items are shifted in both directions,
  1235. as well as number of items in each part of the splitted node (s012 numbers),
  1236. and number of bytes (s1bytes) of the shared drop which flow to S1 if any
  1237. */
  1238. lrset = LR_SHIFT_NO_FLOW;
  1239. lrnver = get_num_ver(vn->vn_mode, tb, h,
  1240. lpar - ((h || tb->lbytes == -1) ? 0 : 1),
  1241. -1,
  1242. h ? (vn->vn_nr_item - rpar) : (rpar -
  1243. ((tb->
  1244. rbytes !=
  1245. -1) ? 1 :
  1246. 0)), -1,
  1247. snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
  1248. if (!h) {
  1249. int lrnver1;
  1250. lrnver1 = get_num_ver(vn->vn_mode, tb, h,
  1251. lpar -
  1252. ((tb->lbytes != -1) ? 1 : 0),
  1253. tb->lbytes,
  1254. (rpar -
  1255. ((tb->rbytes != -1) ? 1 : 0)),
  1256. tb->rbytes,
  1257. snum012 + LR_SHIFT_FLOW, FLOW);
  1258. if (lrnver > lrnver1)
  1259. lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
  1260. }
  1261. /* Our general shifting strategy is:
  1262. 1) to minimized number of new nodes;
  1263. 2) to minimized number of neighbors involved in shifting;
  1264. 3) to minimized number of disk reads; */
  1265. /* we can win TWO or ONE nodes by shifting in both directions */
  1266. if (lrnver < lnver && lrnver < rnver) {
  1267. RFALSE(h &&
  1268. (tb->lnum[h] != 1 ||
  1269. tb->rnum[h] != 1 ||
  1270. lrnver != 1 || rnver != 2 || lnver != 2
  1271. || h != 1), "vs-8230: bad h");
  1272. if (lrset == LR_SHIFT_FLOW)
  1273. set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
  1274. lrnver, snum012 + lrset,
  1275. tb->lbytes, tb->rbytes);
  1276. else
  1277. set_parameters(tb, h,
  1278. tb->lnum[h] -
  1279. ((tb->lbytes == -1) ? 0 : 1),
  1280. tb->rnum[h] -
  1281. ((tb->rbytes == -1) ? 0 : 1),
  1282. lrnver, snum012 + lrset, -1, -1);
  1283. return CARRY_ON;
  1284. }
  1285. /* if shifting doesn't lead to better packing then don't shift */
  1286. if (nver == lrnver) {
  1287. set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
  1288. -1);
  1289. return CARRY_ON;
  1290. }
  1291. /* now we know that for better packing shifting in only one
  1292. direction either to the left or to the right is required */
  1293. /* if shifting to the left is better than shifting to the right */
  1294. if (lnver < rnver) {
  1295. SET_PAR_SHIFT_LEFT;
  1296. return CARRY_ON;
  1297. }
  1298. /* if shifting to the right is better than shifting to the left */
  1299. if (lnver > rnver) {
  1300. SET_PAR_SHIFT_RIGHT;
  1301. return CARRY_ON;
  1302. }
  1303. /* now shifting in either direction gives the same number
  1304. of nodes and we can make use of the cached neighbors */
  1305. if (is_left_neighbor_in_cache(tb, h)) {
  1306. SET_PAR_SHIFT_LEFT;
  1307. return CARRY_ON;
  1308. }
  1309. /* shift to the right independently on whether the right neighbor in cache or not */
  1310. SET_PAR_SHIFT_RIGHT;
  1311. return CARRY_ON;
  1312. }
  1313. }
  1314. /* Check whether current node S[h] is balanced when Decreasing its size by
  1315. * Deleting or Cutting for INTERNAL node of S+tree.
  1316. * Calculate parameters for balancing for current level h.
  1317. * Parameters:
  1318. * tb tree_balance structure;
  1319. * h current level of the node;
  1320. * inum item number in S[h];
  1321. * mode i - insert, p - paste;
  1322. * Returns: 1 - schedule occurred;
  1323. * 0 - balancing for higher levels needed;
  1324. * -1 - no balancing for higher levels needed;
  1325. * -2 - no disk space.
  1326. *
  1327. * Note: Items of internal nodes have fixed size, so the balance condition for
  1328. * the internal part of S+tree is as for the B-trees.
  1329. */
  1330. static int dc_check_balance_internal(struct tree_balance *tb, int h)
  1331. {
  1332. struct virtual_node *vn = tb->tb_vn;
  1333. /* Sh is the node whose balance is currently being checked,
  1334. and Fh is its father. */
  1335. struct buffer_head *Sh, *Fh;
  1336. int maxsize, ret;
  1337. int lfree, rfree /* free space in L and R */ ;
  1338. Sh = PATH_H_PBUFFER(tb->tb_path, h);
  1339. Fh = PATH_H_PPARENT(tb->tb_path, h);
  1340. maxsize = MAX_CHILD_SIZE(Sh);
  1341. /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
  1342. /* new_nr_item = number of items node would have if operation is */
  1343. /* performed without balancing (new_nr_item); */
  1344. create_virtual_node(tb, h);
  1345. if (!Fh) { /* S[h] is the root. */
  1346. if (vn->vn_nr_item > 0) {
  1347. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1348. return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
  1349. }
  1350. /* new_nr_item == 0.
  1351. * Current root will be deleted resulting in
  1352. * decrementing the tree height. */
  1353. set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
  1354. return CARRY_ON;
  1355. }
  1356. if ((ret = get_parents(tb, h)) != CARRY_ON)
  1357. return ret;
  1358. /* get free space of neighbors */
  1359. rfree = get_rfree(tb, h);
  1360. lfree = get_lfree(tb, h);
  1361. /* determine maximal number of items we can fit into neighbors */
  1362. check_left(tb, h, lfree);
  1363. check_right(tb, h, rfree);
  1364. if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
  1365. * In this case we balance only if it leads to better packing. */
  1366. if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
  1367. * which is impossible with greater values of new_nr_item. */
  1368. if (tb->lnum[h] >= vn->vn_nr_item + 1) {
  1369. /* All contents of S[h] can be moved to L[h]. */
  1370. int n;
  1371. int order_L;
  1372. order_L =
  1373. ((n =
  1374. PATH_H_B_ITEM_ORDER(tb->tb_path,
  1375. h)) ==
  1376. 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
  1377. n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
  1378. (DC_SIZE + KEY_SIZE);
  1379. set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
  1380. -1);
  1381. return CARRY_ON;
  1382. }
  1383. if (tb->rnum[h] >= vn->vn_nr_item + 1) {
  1384. /* All contents of S[h] can be moved to R[h]. */
  1385. int n;
  1386. int order_R;
  1387. order_R =
  1388. ((n =
  1389. PATH_H_B_ITEM_ORDER(tb->tb_path,
  1390. h)) ==
  1391. B_NR_ITEMS(Fh)) ? 0 : n + 1;
  1392. n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
  1393. (DC_SIZE + KEY_SIZE);
  1394. set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
  1395. -1);
  1396. return CARRY_ON;
  1397. }
  1398. }
  1399. if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
  1400. /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
  1401. int to_r;
  1402. to_r =
  1403. ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
  1404. tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
  1405. (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
  1406. set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
  1407. 0, NULL, -1, -1);
  1408. return CARRY_ON;
  1409. }
  1410. /* Balancing does not lead to better packing. */
  1411. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1412. return NO_BALANCING_NEEDED;
  1413. }
  1414. /* Current node contain insufficient number of items. Balancing is required. */
  1415. /* Check whether we can merge S[h] with left neighbor. */
  1416. if (tb->lnum[h] >= vn->vn_nr_item + 1)
  1417. if (is_left_neighbor_in_cache(tb, h)
  1418. || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
  1419. int n;
  1420. int order_L;
  1421. order_L =
  1422. ((n =
  1423. PATH_H_B_ITEM_ORDER(tb->tb_path,
  1424. h)) ==
  1425. 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
  1426. n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
  1427. KEY_SIZE);
  1428. set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
  1429. return CARRY_ON;
  1430. }
  1431. /* Check whether we can merge S[h] with right neighbor. */
  1432. if (tb->rnum[h] >= vn->vn_nr_item + 1) {
  1433. int n;
  1434. int order_R;
  1435. order_R =
  1436. ((n =
  1437. PATH_H_B_ITEM_ORDER(tb->tb_path,
  1438. h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
  1439. n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
  1440. KEY_SIZE);
  1441. set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
  1442. return CARRY_ON;
  1443. }
  1444. /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
  1445. if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
  1446. int to_r;
  1447. to_r =
  1448. ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
  1449. vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
  1450. tb->rnum[h]);
  1451. set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
  1452. -1, -1);
  1453. return CARRY_ON;
  1454. }
  1455. /* For internal nodes try to borrow item from a neighbor */
  1456. RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
  1457. /* Borrow one or two items from caching neighbor */
  1458. if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
  1459. int from_l;
  1460. from_l =
  1461. (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
  1462. 1) / 2 - (vn->vn_nr_item + 1);
  1463. set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
  1464. return CARRY_ON;
  1465. }
  1466. set_parameters(tb, h, 0,
  1467. -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
  1468. 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
  1469. return CARRY_ON;
  1470. }
  1471. /* Check whether current node S[h] is balanced when Decreasing its size by
  1472. * Deleting or Truncating for LEAF node of S+tree.
  1473. * Calculate parameters for balancing for current level h.
  1474. * Parameters:
  1475. * tb tree_balance structure;
  1476. * h current level of the node;
  1477. * inum item number in S[h];
  1478. * mode i - insert, p - paste;
  1479. * Returns: 1 - schedule occurred;
  1480. * 0 - balancing for higher levels needed;
  1481. * -1 - no balancing for higher levels needed;
  1482. * -2 - no disk space.
  1483. */
  1484. static int dc_check_balance_leaf(struct tree_balance *tb, int h)
  1485. {
  1486. struct virtual_node *vn = tb->tb_vn;
  1487. /* Number of bytes that must be deleted from
  1488. (value is negative if bytes are deleted) buffer which
  1489. contains node being balanced. The mnemonic is that the
  1490. attempted change in node space used level is levbytes bytes. */
  1491. int levbytes;
  1492. /* the maximal item size */
  1493. int maxsize, ret;
  1494. /* S0 is the node whose balance is currently being checked,
  1495. and F0 is its father. */
  1496. struct buffer_head *S0, *F0;
  1497. int lfree, rfree /* free space in L and R */ ;
  1498. S0 = PATH_H_PBUFFER(tb->tb_path, 0);
  1499. F0 = PATH_H_PPARENT(tb->tb_path, 0);
  1500. levbytes = tb->insert_size[h];
  1501. maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
  1502. if (!F0) { /* S[0] is the root now. */
  1503. RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
  1504. "vs-8240: attempt to create empty buffer tree");
  1505. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1506. return NO_BALANCING_NEEDED;
  1507. }
  1508. if ((ret = get_parents(tb, h)) != CARRY_ON)
  1509. return ret;
  1510. /* get free space of neighbors */
  1511. rfree = get_rfree(tb, h);
  1512. lfree = get_lfree(tb, h);
  1513. create_virtual_node(tb, h);
  1514. /* if 3 leaves can be merge to one, set parameters and return */
  1515. if (are_leaves_removable(tb, lfree, rfree))
  1516. return CARRY_ON;
  1517. /* determine maximal number of items we can shift to the left/right neighbor
  1518. and the maximal number of bytes that can flow to the left/right neighbor
  1519. from the left/right most liquid item that cannot be shifted from S[0] entirely
  1520. */
  1521. check_left(tb, h, lfree);
  1522. check_right(tb, h, rfree);
  1523. /* check whether we can merge S with left neighbor. */
  1524. if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
  1525. if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
  1526. !tb->FR[h]) {
  1527. RFALSE(!tb->FL[h],
  1528. "vs-8245: dc_check_balance_leaf: FL[h] must exist");
  1529. /* set parameter to merge S[0] with its left neighbor */
  1530. set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
  1531. return CARRY_ON;
  1532. }
  1533. /* check whether we can merge S[0] with right neighbor. */
  1534. if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
  1535. set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
  1536. return CARRY_ON;
  1537. }
  1538. /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
  1539. if (is_leaf_removable(tb))
  1540. return CARRY_ON;
  1541. /* Balancing is not required. */
  1542. tb->s0num = vn->vn_nr_item;
  1543. set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
  1544. return NO_BALANCING_NEEDED;
  1545. }
  1546. /* Check whether current node S[h] is balanced when Decreasing its size by
  1547. * Deleting or Cutting.
  1548. * Calculate parameters for balancing for current level h.
  1549. * Parameters:
  1550. * tb tree_balance structure;
  1551. * h current level of the node;
  1552. * inum item number in S[h];
  1553. * mode d - delete, c - cut.
  1554. * Returns: 1 - schedule occurred;
  1555. * 0 - balancing for higher levels needed;
  1556. * -1 - no balancing for higher levels needed;
  1557. * -2 - no disk space.
  1558. */
  1559. static int dc_check_balance(struct tree_balance *tb, int h)
  1560. {
  1561. RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
  1562. "vs-8250: S is not initialized");
  1563. if (h)
  1564. return dc_check_balance_internal(tb, h);
  1565. else
  1566. return dc_check_balance_leaf(tb, h);
  1567. }
  1568. /* Check whether current node S[h] is balanced.
  1569. * Calculate parameters for balancing for current level h.
  1570. * Parameters:
  1571. *
  1572. * tb tree_balance structure:
  1573. *
  1574. * tb is a large structure that must be read about in the header file
  1575. * at the same time as this procedure if the reader is to successfully
  1576. * understand this procedure
  1577. *
  1578. * h current level of the node;
  1579. * inum item number in S[h];
  1580. * mode i - insert, p - paste, d - delete, c - cut.
  1581. * Returns: 1 - schedule occurred;
  1582. * 0 - balancing for higher levels needed;
  1583. * -1 - no balancing for higher levels needed;
  1584. * -2 - no disk space.
  1585. */
  1586. static int check_balance(int mode,
  1587. struct tree_balance *tb,
  1588. int h,
  1589. int inum,
  1590. int pos_in_item,
  1591. struct item_head *ins_ih, const void *data)
  1592. {
  1593. struct virtual_node *vn;
  1594. vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
  1595. vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
  1596. vn->vn_mode = mode;
  1597. vn->vn_affected_item_num = inum;
  1598. vn->vn_pos_in_item = pos_in_item;
  1599. vn->vn_ins_ih = ins_ih;
  1600. vn->vn_data = data;
  1601. RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
  1602. "vs-8255: ins_ih can not be 0 in insert mode");
  1603. if (tb->insert_size[h] > 0)
  1604. /* Calculate balance parameters when size of node is increasing. */
  1605. return ip_check_balance(tb, h);
  1606. /* Calculate balance parameters when size of node is decreasing. */
  1607. return dc_check_balance(tb, h);
  1608. }
  1609. /* Check whether parent at the path is the really parent of the current node.*/
  1610. static int get_direct_parent(struct tree_balance *tb, int h)
  1611. {
  1612. struct buffer_head *bh;
  1613. struct treepath *path = tb->tb_path;
  1614. int position,
  1615. path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
  1616. /* We are in the root or in the new root. */
  1617. if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
  1618. RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
  1619. "PAP-8260: invalid offset in the path");
  1620. if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
  1621. b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
  1622. /* Root is not changed. */
  1623. PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
  1624. PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
  1625. return CARRY_ON;
  1626. }
  1627. return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
  1628. }
  1629. if (!B_IS_IN_TREE
  1630. (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
  1631. return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
  1632. if ((position =
  1633. PATH_OFFSET_POSITION(path,
  1634. path_offset - 1)) > B_NR_ITEMS(bh))
  1635. return REPEAT_SEARCH;
  1636. if (B_N_CHILD_NUM(bh, position) !=
  1637. PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
  1638. /* Parent in the path is not parent of the current node in the tree. */
  1639. return REPEAT_SEARCH;
  1640. if (buffer_locked(bh)) {
  1641. reiserfs_write_unlock(tb->tb_sb);
  1642. __wait_on_buffer(bh);
  1643. reiserfs_write_lock(tb->tb_sb);
  1644. if (FILESYSTEM_CHANGED_TB(tb))
  1645. return REPEAT_SEARCH;
  1646. }
  1647. return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
  1648. }
  1649. /* Using lnum[h] and rnum[h] we should determine what neighbors
  1650. * of S[h] we
  1651. * need in order to balance S[h], and get them if necessary.
  1652. * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
  1653. * CARRY_ON - schedule didn't occur while the function worked;
  1654. */
  1655. static int get_neighbors(struct tree_balance *tb, int h)
  1656. {
  1657. int child_position,
  1658. path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
  1659. unsigned long son_number;
  1660. struct super_block *sb = tb->tb_sb;
  1661. struct buffer_head *bh;
  1662. PROC_INFO_INC(sb, get_neighbors[h]);
  1663. if (tb->lnum[h]) {
  1664. /* We need left neighbor to balance S[h]. */
  1665. PROC_INFO_INC(sb, need_l_neighbor[h]);
  1666. bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
  1667. RFALSE(bh == tb->FL[h] &&
  1668. !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
  1669. "PAP-8270: invalid position in the parent");
  1670. child_position =
  1671. (bh ==
  1672. tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
  1673. FL[h]);
  1674. son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
  1675. reiserfs_write_unlock(sb);
  1676. bh = sb_bread(sb, son_number);
  1677. reiserfs_write_lock(sb);
  1678. if (!bh)
  1679. return IO_ERROR;
  1680. if (FILESYSTEM_CHANGED_TB(tb)) {
  1681. brelse(bh);
  1682. PROC_INFO_INC(sb, get_neighbors_restart[h]);
  1683. return REPEAT_SEARCH;
  1684. }
  1685. RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
  1686. child_position > B_NR_ITEMS(tb->FL[h]) ||
  1687. B_N_CHILD_NUM(tb->FL[h], child_position) !=
  1688. bh->b_blocknr, "PAP-8275: invalid parent");
  1689. RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
  1690. RFALSE(!h &&
  1691. B_FREE_SPACE(bh) !=
  1692. MAX_CHILD_SIZE(bh) -
  1693. dc_size(B_N_CHILD(tb->FL[0], child_position)),
  1694. "PAP-8290: invalid child size of left neighbor");
  1695. brelse(tb->L[h]);
  1696. tb->L[h] = bh;
  1697. }
  1698. /* We need right neighbor to balance S[path_offset]. */
  1699. if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */
  1700. PROC_INFO_INC(sb, need_r_neighbor[h]);
  1701. bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
  1702. RFALSE(bh == tb->FR[h] &&
  1703. PATH_OFFSET_POSITION(tb->tb_path,
  1704. path_offset) >=
  1705. B_NR_ITEMS(bh),
  1706. "PAP-8295: invalid position in the parent");
  1707. child_position =
  1708. (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
  1709. son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
  1710. reiserfs_write_unlock(sb);
  1711. bh = sb_bread(sb, son_number);
  1712. reiserfs_write_lock(sb);
  1713. if (!bh)
  1714. return IO_ERROR;
  1715. if (FILESYSTEM_CHANGED_TB(tb)) {
  1716. brelse(bh);
  1717. PROC_INFO_INC(sb, get_neighbors_restart[h]);
  1718. return REPEAT_SEARCH;
  1719. }
  1720. brelse(tb->R[h]);
  1721. tb->R[h] = bh;
  1722. RFALSE(!h
  1723. && B_FREE_SPACE(bh) !=
  1724. MAX_CHILD_SIZE(bh) -
  1725. dc_size(B_N_CHILD(tb->FR[0], child_position)),
  1726. "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
  1727. B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
  1728. dc_size(B_N_CHILD(tb->FR[0], child_position)));
  1729. }
  1730. return CARRY_ON;
  1731. }
  1732. static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
  1733. {
  1734. int max_num_of_items;
  1735. int max_num_of_entries;
  1736. unsigned long blocksize = sb->s_blocksize;
  1737. #define MIN_NAME_LEN 1
  1738. max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
  1739. max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
  1740. (DEH_SIZE + MIN_NAME_LEN);
  1741. return sizeof(struct virtual_node) +
  1742. max(max_num_of_items * sizeof(struct virtual_item),
  1743. sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
  1744. (max_num_of_entries - 1) * sizeof(__u16));
  1745. }
  1746. /* maybe we should fail balancing we are going to perform when kmalloc
  1747. fails several times. But now it will loop until kmalloc gets
  1748. required memory */
  1749. static int get_mem_for_virtual_node(struct tree_balance *tb)
  1750. {
  1751. int check_fs = 0;
  1752. int size;
  1753. char *buf;
  1754. size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
  1755. if (size > tb->vn_buf_size) {
  1756. /* we have to allocate more memory for virtual node */
  1757. if (tb->vn_buf) {
  1758. /* free memory allocated before */
  1759. kfree(tb->vn_buf);
  1760. /* this is not needed if kfree is atomic */
  1761. check_fs = 1;
  1762. }
  1763. /* virtual node requires now more memory */
  1764. tb->vn_buf_size = size;
  1765. /* get memory for virtual item */
  1766. buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
  1767. if (!buf) {
  1768. /* getting memory with GFP_KERNEL priority may involve
  1769. balancing now (due to indirect_to_direct conversion on
  1770. dcache shrinking). So, release path and collected
  1771. resources here */
  1772. free_buffers_in_tb(tb);
  1773. buf = kmalloc(size, GFP_NOFS);
  1774. if (!buf) {
  1775. tb->vn_buf_size = 0;
  1776. }
  1777. tb->vn_buf = buf;
  1778. schedule();
  1779. return REPEAT_SEARCH;
  1780. }
  1781. tb->vn_buf = buf;
  1782. }
  1783. if (check_fs && FILESYSTEM_CHANGED_TB(tb))
  1784. return REPEAT_SEARCH;
  1785. return CARRY_ON;
  1786. }
  1787. #ifdef CONFIG_REISERFS_CHECK
  1788. static void tb_buffer_sanity_check(struct super_block *sb,
  1789. struct buffer_head *bh,
  1790. const char *descr, int level)
  1791. {
  1792. if (bh) {
  1793. if (atomic_read(&(bh->b_count)) <= 0)
  1794. reiserfs_panic(sb, "jmacd-1", "negative or zero "
  1795. "reference counter for buffer %s[%d] "
  1796. "(%b)", descr, level, bh);
  1797. if (!buffer_uptodate(bh))
  1798. reiserfs_panic(sb, "jmacd-2", "buffer is not up "
  1799. "to date %s[%d] (%b)",
  1800. descr, level, bh);
  1801. if (!B_IS_IN_TREE(bh))
  1802. reiserfs_panic(sb, "jmacd-3", "buffer is not "
  1803. "in tree %s[%d] (%b)",
  1804. descr, level, bh);
  1805. if (bh->b_bdev != sb->s_bdev)
  1806. reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
  1807. "device %s[%d] (%b)",
  1808. descr, level, bh);
  1809. if (bh->b_size != sb->s_blocksize)
  1810. reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
  1811. "blocksize %s[%d] (%b)",
  1812. descr, level, bh);
  1813. if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
  1814. reiserfs_panic(sb, "jmacd-6", "buffer block "
  1815. "number too high %s[%d] (%b)",
  1816. descr, level, bh);
  1817. }
  1818. }
  1819. #else
  1820. static void tb_buffer_sanity_check(struct super_block *sb,
  1821. struct buffer_head *bh,
  1822. const char *descr, int level)
  1823. {;
  1824. }
  1825. #endif
  1826. static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
  1827. {
  1828. return reiserfs_prepare_for_journal(s, bh, 0);
  1829. }
  1830. static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
  1831. {
  1832. struct buffer_head *locked;
  1833. #ifdef CONFIG_REISERFS_CHECK
  1834. int repeat_counter = 0;
  1835. #endif
  1836. int i;
  1837. do {
  1838. locked = NULL;
  1839. for (i = tb->tb_path->path_length;
  1840. !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
  1841. if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
  1842. /* if I understand correctly, we can only be sure the last buffer
  1843. ** in the path is in the tree --clm
  1844. */
  1845. #ifdef CONFIG_REISERFS_CHECK
  1846. if (PATH_PLAST_BUFFER(tb->tb_path) ==
  1847. PATH_OFFSET_PBUFFER(tb->tb_path, i))
  1848. tb_buffer_sanity_check(tb->tb_sb,
  1849. PATH_OFFSET_PBUFFER
  1850. (tb->tb_path,
  1851. i), "S",
  1852. tb->tb_path->
  1853. path_length - i);
  1854. #endif
  1855. if (!clear_all_dirty_bits(tb->tb_sb,
  1856. PATH_OFFSET_PBUFFER
  1857. (tb->tb_path,
  1858. i))) {
  1859. locked =
  1860. PATH_OFFSET_PBUFFER(tb->tb_path,
  1861. i);
  1862. }
  1863. }
  1864. }
  1865. for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
  1866. i++) {
  1867. if (tb->lnum[i]) {
  1868. if (tb->L[i]) {
  1869. tb_buffer_sanity_check(tb->tb_sb,
  1870. tb->L[i],
  1871. "L", i);
  1872. if (!clear_all_dirty_bits
  1873. (tb->tb_sb, tb->L[i]))
  1874. locked = tb->L[i];
  1875. }
  1876. if (!locked && tb->FL[i]) {
  1877. tb_buffer_sanity_check(tb->tb_sb,
  1878. tb->FL[i],
  1879. "FL", i);
  1880. if (!clear_all_dirty_bits
  1881. (tb->tb_sb, tb->FL[i]))
  1882. locked = tb->FL[i];
  1883. }
  1884. if (!locked && tb->CFL[i]) {
  1885. tb_buffer_sanity_check(tb->tb_sb,
  1886. tb->CFL[i],
  1887. "CFL", i);
  1888. if (!clear_all_dirty_bits
  1889. (tb->tb_sb, tb->CFL[i]))
  1890. locked = tb->CFL[i];
  1891. }
  1892. }
  1893. if (!locked && (tb->rnum[i])) {
  1894. if (tb->R[i]) {
  1895. tb_buffer_sanity_check(tb->tb_sb,
  1896. tb->R[i],
  1897. "R", i);
  1898. if (!clear_all_dirty_bits
  1899. (tb->tb_sb, tb->R[i]))
  1900. locked = tb->R[i];
  1901. }
  1902. if (!locked && tb->FR[i]) {
  1903. tb_buffer_sanity_check(tb->tb_sb,
  1904. tb->FR[i],
  1905. "FR", i);
  1906. if (!clear_all_dirty_bits
  1907. (tb->tb_sb, tb->FR[i]))
  1908. locked = tb->FR[i];
  1909. }
  1910. if (!locked && tb->CFR[i]) {
  1911. tb_buffer_sanity_check(tb->tb_sb,
  1912. tb->CFR[i],
  1913. "CFR", i);
  1914. if (!clear_all_dirty_bits
  1915. (tb->tb_sb, tb->CFR[i]))
  1916. locked = tb->CFR[i];
  1917. }
  1918. }
  1919. }
  1920. /* as far as I can tell, this is not required. The FEB list seems
  1921. ** to be full of newly allocated nodes, which will never be locked,
  1922. ** dirty, or anything else.
  1923. ** To be safe, I'm putting in the checks and waits in. For the moment,
  1924. ** they are needed to keep the code in journal.c from complaining
  1925. ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
  1926. ** --clm
  1927. */
  1928. for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
  1929. if (tb->FEB[i]) {
  1930. if (!clear_all_dirty_bits
  1931. (tb->tb_sb, tb->FEB[i]))
  1932. locked = tb->FEB[i];
  1933. }
  1934. }
  1935. if (locked) {
  1936. #ifdef CONFIG_REISERFS_CHECK
  1937. repeat_counter++;
  1938. if ((repeat_counter % 10000) == 0) {
  1939. reiserfs_warning(tb->tb_sb, "reiserfs-8200",
  1940. "too many iterations waiting "
  1941. "for buffer to unlock "
  1942. "(%b)", locked);
  1943. /* Don't loop forever. Try to recover from possible error. */
  1944. return (FILESYSTEM_CHANGED_TB(tb)) ?
  1945. REPEAT_SEARCH : CARRY_ON;
  1946. }
  1947. #endif
  1948. reiserfs_write_unlock(tb->tb_sb);
  1949. __wait_on_buffer(locked);
  1950. reiserfs_write_lock(tb->tb_sb);
  1951. if (FILESYSTEM_CHANGED_TB(tb))
  1952. return REPEAT_SEARCH;
  1953. }
  1954. } while (locked);
  1955. return CARRY_ON;
  1956. }
  1957. /* Prepare for balancing, that is
  1958. * get all necessary parents, and neighbors;
  1959. * analyze what and where should be moved;
  1960. * get sufficient number of new nodes;
  1961. * Balancing will start only after all resources will be collected at a time.
  1962. *
  1963. * When ported to SMP kernels, only at the last moment after all needed nodes
  1964. * are collected in cache, will the resources be locked using the usual
  1965. * textbook ordered lock acquisition algorithms. Note that ensuring that
  1966. * this code neither write locks what it does not need to write lock nor locks out of order
  1967. * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans
  1968. *
  1969. * fix is meant in the sense of render unchanging
  1970. *
  1971. * Latency might be improved by first gathering a list of what buffers are needed
  1972. * and then getting as many of them in parallel as possible? -Hans
  1973. *
  1974. * Parameters:
  1975. * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
  1976. * tb tree_balance structure;
  1977. * inum item number in S[h];
  1978. * pos_in_item - comment this if you can
  1979. * ins_ih item head of item being inserted
  1980. * data inserted item or data to be pasted
  1981. * Returns: 1 - schedule occurred while the function worked;
  1982. * 0 - schedule didn't occur while the function worked;
  1983. * -1 - if no_disk_space
  1984. */
  1985. int fix_nodes(int op_mode, struct tree_balance *tb,
  1986. struct item_head *ins_ih, const void *data)
  1987. {
  1988. int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
  1989. int pos_in_item;
  1990. /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
  1991. ** during wait_tb_buffers_run
  1992. */
  1993. int wait_tb_buffers_run = 0;
  1994. struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
  1995. ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
  1996. pos_in_item = tb->tb_path->pos_in_item;
  1997. tb->fs_gen = get_generation(tb->tb_sb);
  1998. /* we prepare and log the super here so it will already be in the
  1999. ** transaction when do_balance needs to change it.
  2000. ** This way do_balance won't have to schedule when trying to prepare
  2001. ** the super for logging
  2002. */
  2003. reiserfs_prepare_for_journal(tb->tb_sb,
  2004. SB_BUFFER_WITH_SB(tb->tb_sb), 1);
  2005. journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
  2006. SB_BUFFER_WITH_SB(tb->tb_sb));
  2007. if (FILESYSTEM_CHANGED_TB(tb))
  2008. return REPEAT_SEARCH;
  2009. /* if it possible in indirect_to_direct conversion */
  2010. if (buffer_locked(tbS0)) {
  2011. reiserfs_write_unlock(tb->tb_sb);
  2012. __wait_on_buffer(tbS0);
  2013. reiserfs_write_lock(tb->tb_sb);
  2014. if (FILESYSTEM_CHANGED_TB(tb))
  2015. return REPEAT_SEARCH;
  2016. }
  2017. #ifdef CONFIG_REISERFS_CHECK
  2018. if (REISERFS_SB(tb->tb_sb)->cur_tb) {
  2019. print_cur_tb("fix_nodes");
  2020. reiserfs_panic(tb->tb_sb, "PAP-8305",
  2021. "there is pending do_balance");
  2022. }
  2023. if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
  2024. reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
  2025. "not uptodate at the beginning of fix_nodes "
  2026. "or not in tree (mode %c)",
  2027. tbS0, tbS0, op_mode);
  2028. /* Check parameters. */
  2029. switch (op_mode) {
  2030. case M_INSERT:
  2031. if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
  2032. reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
  2033. "item number %d (in S0 - %d) in case "
  2034. "of insert", item_num,
  2035. B_NR_ITEMS(tbS0));
  2036. break;
  2037. case M_PASTE:
  2038. case M_DELETE:
  2039. case M_CUT:
  2040. if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
  2041. print_block(tbS0, 0, -1, -1);
  2042. reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
  2043. "item number(%d); mode = %c "
  2044. "insert_size = %d",
  2045. item_num, op_mode,
  2046. tb->insert_size[0]);
  2047. }
  2048. break;
  2049. default:
  2050. reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
  2051. "of operation");
  2052. }
  2053. #endif
  2054. if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
  2055. // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
  2056. return REPEAT_SEARCH;
  2057. /* Starting from the leaf level; for all levels h of the tree. */
  2058. for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
  2059. ret = get_direct_parent(tb, h);
  2060. if (ret != CARRY_ON)
  2061. goto repeat;
  2062. ret = check_balance(op_mode, tb, h, item_num,
  2063. pos_in_item, ins_ih, data);
  2064. if (ret != CARRY_ON) {
  2065. if (ret == NO_BALANCING_NEEDED) {
  2066. /* No balancing for higher levels needed. */
  2067. ret = get_neighbors(tb, h);
  2068. if (ret != CARRY_ON)
  2069. goto repeat;
  2070. if (h != MAX_HEIGHT - 1)
  2071. tb->insert_size[h + 1] = 0;
  2072. /* ok, analysis and resource gathering are complete */
  2073. break;
  2074. }
  2075. goto repeat;
  2076. }
  2077. ret = get_neighbors(tb, h);
  2078. if (ret != CARRY_ON)
  2079. goto repeat;
  2080. /* No disk space, or schedule occurred and analysis may be
  2081. * invalid and needs to be redone. */
  2082. ret = get_empty_nodes(tb, h);
  2083. if (ret != CARRY_ON)
  2084. goto repeat;
  2085. if (!PATH_H_PBUFFER(tb->tb_path, h)) {
  2086. /* We have a positive insert size but no nodes exist on this
  2087. level, this means that we are creating a new root. */
  2088. RFALSE(tb->blknum[h] != 1,
  2089. "PAP-8350: creating new empty root");
  2090. if (h < MAX_HEIGHT - 1)
  2091. tb->insert_size[h + 1] = 0;
  2092. } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
  2093. if (tb->blknum[h] > 1) {
  2094. /* The tree needs to be grown, so this node S[h]
  2095. which is the root node is split into two nodes,
  2096. and a new node (S[h+1]) will be created to
  2097. become the root node. */
  2098. RFALSE(h == MAX_HEIGHT - 1,
  2099. "PAP-8355: attempt to create too high of a tree");
  2100. tb->insert_size[h + 1] =
  2101. (DC_SIZE +
  2102. KEY_SIZE) * (tb->blknum[h] - 1) +
  2103. DC_SIZE;
  2104. } else if (h < MAX_HEIGHT - 1)
  2105. tb->insert_size[h + 1] = 0;
  2106. } else
  2107. tb->insert_size[h + 1] =
  2108. (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
  2109. }
  2110. ret = wait_tb_buffers_until_unlocked(tb);
  2111. if (ret == CARRY_ON) {
  2112. if (FILESYSTEM_CHANGED_TB(tb)) {
  2113. wait_tb_buffers_run = 1;
  2114. ret = REPEAT_SEARCH;
  2115. goto repeat;
  2116. } else {
  2117. return CARRY_ON;
  2118. }
  2119. } else {
  2120. wait_tb_buffers_run = 1;
  2121. goto repeat;
  2122. }
  2123. repeat:
  2124. // fix_nodes was unable to perform its calculation due to
  2125. // filesystem got changed under us, lack of free disk space or i/o
  2126. // failure. If the first is the case - the search will be
  2127. // repeated. For now - free all resources acquired so far except
  2128. // for the new allocated nodes
  2129. {
  2130. int i;
  2131. /* Release path buffers. */
  2132. if (wait_tb_buffers_run) {
  2133. pathrelse_and_restore(tb->tb_sb, tb->tb_path);
  2134. } else {
  2135. pathrelse(tb->tb_path);
  2136. }
  2137. /* brelse all resources collected for balancing */
  2138. for (i = 0; i < MAX_HEIGHT; i++) {
  2139. if (wait_tb_buffers_run) {
  2140. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2141. tb->L[i]);
  2142. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2143. tb->R[i]);
  2144. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2145. tb->FL[i]);
  2146. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2147. tb->FR[i]);
  2148. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2149. tb->
  2150. CFL[i]);
  2151. reiserfs_restore_prepared_buffer(tb->tb_sb,
  2152. tb->
  2153. CFR[i]);
  2154. }
  2155. brelse(tb->L[i]);
  2156. brelse(tb->R[i]);
  2157. brelse(tb->FL[i]);
  2158. brelse(tb->FR[i]);
  2159. brelse(tb->CFL[i]);
  2160. brelse(tb->CFR[i]);
  2161. tb->L[i] = NULL;
  2162. tb->R[i] = NULL;
  2163. tb->FL[i] = NULL;
  2164. tb->FR[i] = NULL;
  2165. tb->CFL[i] = NULL;
  2166. tb->CFR[i] = NULL;
  2167. }
  2168. if (wait_tb_buffers_run) {
  2169. for (i = 0; i < MAX_FEB_SIZE; i++) {
  2170. if (tb->FEB[i])
  2171. reiserfs_restore_prepared_buffer
  2172. (tb->tb_sb, tb->FEB[i]);
  2173. }
  2174. }
  2175. return ret;
  2176. }
  2177. }
  2178. /* Anatoly will probably forgive me renaming tb to tb. I just
  2179. wanted to make lines shorter */
  2180. void unfix_nodes(struct tree_balance *tb)
  2181. {
  2182. int i;
  2183. /* Release path buffers. */
  2184. pathrelse_and_restore(tb->tb_sb, tb->tb_path);
  2185. /* brelse all resources collected for balancing */
  2186. for (i = 0; i < MAX_HEIGHT; i++) {
  2187. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
  2188. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
  2189. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
  2190. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
  2191. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
  2192. reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
  2193. brelse(tb->L[i]);
  2194. brelse(tb->R[i]);
  2195. brelse(tb->FL[i]);
  2196. brelse(tb->FR[i]);
  2197. brelse(tb->CFL[i]);
  2198. brelse(tb->CFR[i]);
  2199. }
  2200. /* deal with list of allocated (used and unused) nodes */
  2201. for (i = 0; i < MAX_FEB_SIZE; i++) {
  2202. if (tb->FEB[i]) {
  2203. b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
  2204. /* de-allocated block which was not used by balancing and
  2205. bforget about buffer for it */
  2206. brelse(tb->FEB[i]);
  2207. reiserfs_free_block(tb->transaction_handle, NULL,
  2208. blocknr, 0);
  2209. }
  2210. if (tb->used[i]) {
  2211. /* release used as new nodes including a new root */
  2212. brelse(tb->used[i]);
  2213. }
  2214. }
  2215. kfree(tb->vn_buf);
  2216. }