blob: 2f87f5b14630e2323c9e91c784adc8affb0b1831 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree */
6/* Further calculations can not cause schedule(), and thus the buffer */
7/* tree will be stable until the balancing will be finished */
8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */
10
Linus Torvalds1da177e2005-04-16 15:20:36 -070011/**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <linux/time.h>
20#include <linux/reiserfs_fs.h>
21#include <linux/buffer_head.h>
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -080022#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
24#ifdef CONFIG_REISERFS_CHECK
25
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070026struct tree_balance *cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#endif
31
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070032inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
33 struct buffer_head *bh, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070034{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070035 journal_mark_dirty(tb->transaction_handle,
36 tb->transaction_handle->t_super, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -070037}
38
39#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
40#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
41
Linus Torvalds1da177e2005-04-16 15:20:36 -070042/* summary:
43 if deleting something ( tb->insert_size[0] < 0 )
44 return(balance_leaf_when_delete()); (flag d handled here)
45 else
46 if lnum is larger than 0 we put items into the left node
47 if rnum is larger than 0 we put items into the right node
48 if snum1 is larger than 0 we put items into the new node s1
49 if snum2 is larger than 0 we put items into the new node s2
50Note that all *num* count new items being created.
51
52It would be easier to read balance_leaf() if each of these summary
53lines was a separate procedure rather than being inlined. I think
54that there are many passages here and in balance_leaf_when_delete() in
55which two calls to one procedure can replace two passages, and it
56might save cache space and improve software maintenance costs to do so.
57
58Vladimir made the perceptive comment that we should offload most of
59the decision making in this function into fix_nodes/check_balance, and
60then create some sort of structure in tb that says what actions should
61be performed by do_balance.
62
63-Hans */
64
Linus Torvalds1da177e2005-04-16 15:20:36 -070065/* Balance leaf node in case of delete or cut: insert_size[0] < 0
66 *
67 * lnum, rnum can have values >= -1
68 * -1 means that the neighbor must be joined with S
69 * 0 means that nothing should be done with the neighbor
70 * >0 means to shift entirely or partly the specified number of items to the neighbor
71 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070072static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070074 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
75 int item_pos = PATH_LAST_POSITION(tb->tb_path);
76 int pos_in_item = tb->tb_path->pos_in_item;
77 struct buffer_info bi;
78 int n;
79 struct item_head *ih;
Linus Torvalds1da177e2005-04-16 15:20:36 -070080
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070081 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
82 "vs- 12000: level: wrong FR %z", tb->FR[0]);
83 RFALSE(tb->blknum[0] > 1,
84 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
85 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
86 "PAP-12010: tree can not be empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070088 ih = B_N_PITEM_HEAD(tbS0, item_pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070090 /* Delete or truncate the item */
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070092 switch (flag) {
93 case M_DELETE: /* delete item in S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070095 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb->insert_size[0], ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070099 bi.tb = tb;
100 bi.bi_bh = tbS0;
101 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
102 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
103 leaf_delete_items(&bi, 0, item_pos, 1, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700105 if (!item_pos && tb->CFL[0]) {
106 if (B_NR_ITEMS(tbS0)) {
107 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
108 0);
109 } else {
110 if (!PATH_H_POSITION(tb->tb_path, 1))
111 replace_key(tb, tb->CFL[0], tb->lkey[0],
112 PATH_H_PPARENT(tb->tb_path,
113 0), 0);
114 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700117 RFALSE(!item_pos && !tb->CFL[0],
118 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
119 tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700121 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700123 case M_CUT:{ /* cut item in S[0] */
124 bi.tb = tb;
125 bi.bi_bh = tbS0;
126 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
127 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
128 if (is_direntry_le_ih(ih)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
131 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
132 tb->insert_size[0] = -1;
133 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
134 -tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700136 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700140 if (!item_pos && !pos_in_item && tb->CFL[0]) {
141 replace_key(tb, tb->CFL[0], tb->lkey[0],
142 tbS0, 0);
143 }
144 } else {
145 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
146 -tb->insert_size[0]);
147
148 RFALSE(!ih_item_len(ih),
149 "PAP-12035: cut must leave non-zero dynamic length of item");
150 }
151 break;
152 }
153
154 default:
155 print_cur_tb("12040");
156 reiserfs_panic(tb->tb_sb,
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
158 (flag ==
159 M_PASTE) ? "PASTE" : ((flag ==
160 M_INSERT) ? "INSERT" :
161 "UNKNOWN"), flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700164 /* the rule is that no shifting occurs unless by shifting a node can be freed */
165 n = B_NR_ITEMS(tbS0);
166 if (tb->lnum[0]) { /* L[0] takes part in balancing */
167 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
168 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
169 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
170 /* all contents of all the 3 buffers will be in L[0] */
171 if (PATH_H_POSITION(tb->tb_path, 1) == 0
172 && 1 < B_NR_ITEMS(tb->FR[0]))
173 replace_key(tb, tb->CFL[0],
174 tb->lkey[0],
175 tb->FR[0], 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700177 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
178 -1, NULL);
179 leaf_move_items(LEAF_FROM_R_TO_L, tb,
180 B_NR_ITEMS(tb->R[0]),
181 -1, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700183 reiserfs_invalidate_buffer(tb, tbS0);
184 reiserfs_invalidate_buffer(tb,
185 tb->R[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700187 return 0;
188 }
189 /* all contents of all the 3 buffers will be in R[0] */
190 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
191 NULL);
192 leaf_move_items(LEAF_FROM_L_TO_R, tb,
193 B_NR_ITEMS(tb->L[0]), -1, NULL);
194
195 /* right_delimiting_key is correct in R[0] */
196 replace_key(tb, tb->CFR[0], tb->rkey[0],
197 tb->R[0], 0);
198
199 reiserfs_invalidate_buffer(tb, tbS0);
200 reiserfs_invalidate_buffer(tb, tb->L[0]);
201
202 return -1;
203 }
204
205 RFALSE(tb->rnum[0] != 0,
206 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
207 /* all contents of L[0] and S[0] will be in L[0] */
208 leaf_shift_left(tb, n, -1);
209
210 reiserfs_invalidate_buffer(tb, tbS0);
211
212 return 0;
213 }
214 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215
216 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
217 (tb->lnum[0] + tb->rnum[0] > n + 1),
218 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
219 tb->rnum[0], tb->lnum[0], n);
220 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
221 (tb->lbytes != -1 || tb->rbytes != -1),
222 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
223 tb->rbytes, tb->lbytes);
224 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
225 (tb->lbytes < 1 || tb->rbytes != -1),
226 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
227 tb->rbytes, tb->lbytes);
228
229 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
230 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
231
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
235 }
236
237 if (tb->rnum[0] == -1) {
238 /* all contents of R[0] and S[0] will be in R[0] */
239 leaf_shift_right(tb, n, -1);
240 reiserfs_invalidate_buffer(tb, tbS0);
241 return 0;
242 }
243
244 RFALSE(tb->rnum[0],
245 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700249static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
250 const char *body, /* body of inserted item or bytes to paste */
251 int flag, /* i - insert, d - delete, c - cut, p - paste
252 (see comment to do_balance) */
253 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
254 must be inserted into the next higher level. This insertion
255 consists of a key or two keys and their corresponding
256 pointers */
257 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 )
259{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
261 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
262 of the affected item */
263 struct buffer_info bi;
264 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
265 int snum[2]; /* number of items that will be placed
266 into S_new (includes partially shifted
267 items) */
268 int sbytes[2]; /* if an item is partially shifted into S_new then
269 if it is a directory item
270 it is the number of entries from the item that are shifted into S_new
271 else
272 it is the number of bytes from the item that are shifted into S_new
273 */
274 int n, i;
275 int ret_val;
276 int pos_in_item;
277 int zeros_num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700279 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700281 /* Make balance in case insert_size[0] < 0 */
282 if (tb->insert_size[0] < 0)
283 return balance_leaf_when_delete(tb, flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700285 zeros_num = 0;
Al Viro9dce07f2008-03-29 03:07:28 +0000286 if (flag == M_INSERT && !body)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700287 zeros_num = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700289 pos_in_item = tb->tb_path->pos_in_item;
290 /* for indirect item pos_in_item is measured in unformatted node
291 pointers. Recalculate to bytes */
292 if (flag != M_INSERT
293 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
294 pos_in_item *= UNFM_P_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700296 if (tb->lnum[0] > 0) {
297 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
298 if (item_pos < tb->lnum[0]) {
299 /* new item or it part falls to L[0], shift it too */
300 n = B_NR_ITEMS(tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700302 switch (flag) {
303 case M_INSERT: /* insert item into L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700305 if (item_pos == tb->lnum[0] - 1
306 && tb->lbytes != -1) {
307 /* part of new item falls into L[0] */
308 int new_item_len;
309 int version;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700311 ret_val =
312 leaf_shift_left(tb, tb->lnum[0] - 1,
313 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700315 /* Calculate item length to insert to S[0] */
316 new_item_len =
317 ih_item_len(ih) - tb->lbytes;
318 /* Calculate and check item length to insert to L[0] */
319 put_ih_item_len(ih,
320 ih_item_len(ih) -
321 new_item_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700323 RFALSE(ih_item_len(ih) <= 0,
324 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
325 ih_item_len(ih));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700327 /* Insert new item into L[0] */
328 bi.tb = tb;
329 bi.bi_bh = tb->L[0];
330 bi.bi_parent = tb->FL[0];
331 bi.bi_position =
332 get_left_neighbor_position(tb, 0);
333 leaf_insert_into_buf(&bi,
334 n + item_pos -
335 ret_val, ih, body,
336 zeros_num >
337 ih_item_len(ih) ?
338 ih_item_len(ih) :
339 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700341 version = ih_version(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700343 /* Calculate key component, item length and body to insert into S[0] */
344 set_le_ih_k_offset(ih,
345 le_ih_k_offset(ih) +
346 (tb->
347 lbytes <<
348 (is_indirect_le_ih
349 (ih) ? tb->tb_sb->
350 s_blocksize_bits -
351 UNFM_P_SHIFT :
352 0)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700354 put_ih_item_len(ih, new_item_len);
355 if (tb->lbytes > zeros_num) {
356 body +=
357 (tb->lbytes - zeros_num);
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
368 ret_val =
369 leaf_shift_left(tb, tb->lnum[0] - 1,
370 tb->lbytes);
371 /* Insert new item into L[0] */
372 bi.tb = tb;
373 bi.bi_bh = tb->L[0];
374 bi.bi_parent = tb->FL[0];
375 bi.bi_position =
376 get_left_neighbor_position(tb, 0);
377 leaf_insert_into_buf(&bi,
378 n + item_pos -
379 ret_val, ih, body,
380 zeros_num);
381 tb->insert_size[0] = 0;
382 zeros_num = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700384 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700386 case M_PASTE: /* append item in L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700388 if (item_pos == tb->lnum[0] - 1
389 && tb->lbytes != -1) {
390 /* we must shift the part of the appended item */
391 if (is_direntry_le_ih
392 (B_N_PITEM_HEAD(tbS0, item_pos))) {
393
394 RFALSE(zeros_num,
395 "PAP-12090: invalid parameter in case of a directory");
396 /* directory item */
397 if (tb->lbytes > pos_in_item) {
398 /* new directory entry falls into L[0] */
399 struct item_head
400 *pasted;
401 int l_pos_in_item =
402 pos_in_item;
403
404 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
405 ret_val =
406 leaf_shift_left(tb,
407 tb->
408 lnum
409 [0],
410 tb->
411 lbytes
412 -
413 1);
414 if (ret_val
415 && !item_pos) {
416 pasted =
417 B_N_PITEM_HEAD
418 (tb->L[0],
419 B_NR_ITEMS
420 (tb->
421 L[0]) -
422 1);
423 l_pos_in_item +=
424 I_ENTRY_COUNT
425 (pasted) -
426 (tb->
427 lbytes -
428 1);
429 }
430
431 /* Append given directory entry to directory item */
432 bi.tb = tb;
433 bi.bi_bh = tb->L[0];
434 bi.bi_parent =
435 tb->FL[0];
436 bi.bi_position =
437 get_left_neighbor_position
438 (tb, 0);
439 leaf_paste_in_buffer
440 (&bi,
441 n + item_pos -
442 ret_val,
443 l_pos_in_item,
444 tb->insert_size[0],
445 body, zeros_num);
446
447 /* previous string prepared space for pasting new entry, following string pastes this entry */
448
449 /* when we have merge directory item, pos_in_item has been changed too */
450
451 /* paste new directory entry. 1 is entry number */
452 leaf_paste_entries(bi.
453 bi_bh,
454 n +
455 item_pos
456 -
457 ret_val,
458 l_pos_in_item,
459 1,
460 (struct
461 reiserfs_de_head
462 *)
463 body,
464 body
465 +
466 DEH_SIZE,
467 tb->
468 insert_size
469 [0]
470 );
471 tb->insert_size[0] = 0;
472 } else {
473 /* new directory item doesn't fall into L[0] */
474 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
475 leaf_shift_left(tb,
476 tb->
477 lnum[0],
478 tb->
479 lbytes);
480 }
481 /* Calculate new position to append in item body */
482 pos_in_item -= tb->lbytes;
483 } else {
484 /* regular object */
485 RFALSE(tb->lbytes <= 0,
486 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
487 tb->lbytes);
488 RFALSE(pos_in_item !=
489 ih_item_len
490 (B_N_PITEM_HEAD
491 (tbS0, item_pos)),
492 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
493 ih_item_len
494 (B_N_PITEM_HEAD
495 (tbS0, item_pos)),
496 pos_in_item);
497
498 if (tb->lbytes >= pos_in_item) {
499 /* appended item will be in L[0] in whole */
500 int l_n;
501
502 /* this bytes number must be appended to the last item of L[h] */
503 l_n =
504 tb->lbytes -
505 pos_in_item;
506
507 /* Calculate new insert_size[0] */
508 tb->insert_size[0] -=
509 l_n;
510
511 RFALSE(tb->
512 insert_size[0] <=
513 0,
514 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
515 tb->
516 insert_size[0]);
517 ret_val =
518 leaf_shift_left(tb,
519 tb->
520 lnum
521 [0],
522 ih_item_len
523 (B_N_PITEM_HEAD
524 (tbS0,
525 item_pos)));
526 /* Append to body of item in L[0] */
527 bi.tb = tb;
528 bi.bi_bh = tb->L[0];
529 bi.bi_parent =
530 tb->FL[0];
531 bi.bi_position =
532 get_left_neighbor_position
533 (tb, 0);
534 leaf_paste_in_buffer
535 (&bi,
536 n + item_pos -
537 ret_val,
538 ih_item_len
539 (B_N_PITEM_HEAD
540 (tb->L[0],
541 n + item_pos -
542 ret_val)), l_n,
543 body,
544 zeros_num >
545 l_n ? l_n :
546 zeros_num);
547 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
548 {
549 int version;
550 int temp_l =
551 l_n;
552
553 RFALSE
554 (ih_item_len
555 (B_N_PITEM_HEAD
556 (tbS0,
557 0)),
558 "PAP-12106: item length must be 0");
559 RFALSE
560 (comp_short_le_keys
561 (B_N_PKEY
562 (tbS0, 0),
563 B_N_PKEY
564 (tb->L[0],
565 n +
566 item_pos
567 -
568 ret_val)),
569 "PAP-12107: items must be of the same file");
570 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
571 temp_l =
572 l_n
573 <<
574 (tb->
575 tb_sb->
576 s_blocksize_bits
577 -
578 UNFM_P_SHIFT);
579 }
580 /* update key of first item in S0 */
581 version =
582 ih_version
583 (B_N_PITEM_HEAD
584 (tbS0, 0));
585 set_le_key_k_offset
586 (version,
587 B_N_PKEY
588 (tbS0, 0),
589 le_key_k_offset
590 (version,
591 B_N_PKEY
592 (tbS0,
593 0)) +
594 temp_l);
595 /* update left delimiting key */
596 set_le_key_k_offset
597 (version,
598 B_N_PDELIM_KEY
599 (tb->
600 CFL[0],
601 tb->
602 lkey[0]),
603 le_key_k_offset
604 (version,
605 B_N_PDELIM_KEY
606 (tb->
607 CFL[0],
608 tb->
609 lkey[0]))
610 + temp_l);
611 }
612
613 /* Calculate new body, position in item and insert_size[0] */
614 if (l_n > zeros_num) {
615 body +=
616 (l_n -
617 zeros_num);
618 zeros_num = 0;
619 } else
620 zeros_num -=
621 l_n;
622 pos_in_item = 0;
623
624 RFALSE
625 (comp_short_le_keys
626 (B_N_PKEY(tbS0, 0),
627 B_N_PKEY(tb->L[0],
628 B_NR_ITEMS
629 (tb->
630 L[0]) -
631 1))
632 ||
633 !op_is_left_mergeable
634 (B_N_PKEY(tbS0, 0),
635 tbS0->b_size)
636 ||
637 !op_is_left_mergeable
638 (B_N_PDELIM_KEY
639 (tb->CFL[0],
640 tb->lkey[0]),
641 tbS0->b_size),
642 "PAP-12120: item must be merge-able with left neighboring item");
643 } else { /* only part of the appended item will be in L[0] */
644
645 /* Calculate position in item for append in S[0] */
646 pos_in_item -=
647 tb->lbytes;
648
649 RFALSE(pos_in_item <= 0,
650 "PAP-12125: no place for paste. pos_in_item=%d",
651 pos_in_item);
652
653 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
654 leaf_shift_left(tb,
655 tb->
656 lnum[0],
657 tb->
658 lbytes);
659 }
660 }
661 } else { /* appended item will be in L[0] in whole */
662
663 struct item_head *pasted;
664
665 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
666 /* then increment pos_in_item by the size of the last item in L[0] */
667 pasted =
668 B_N_PITEM_HEAD(tb->L[0],
669 n - 1);
670 if (is_direntry_le_ih(pasted))
671 pos_in_item +=
672 ih_entry_count
673 (pasted);
674 else
675 pos_in_item +=
676 ih_item_len(pasted);
677 }
678
679 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
680 ret_val =
681 leaf_shift_left(tb, tb->lnum[0],
682 tb->lbytes);
683 /* Append to body of item in L[0] */
684 bi.tb = tb;
685 bi.bi_bh = tb->L[0];
686 bi.bi_parent = tb->FL[0];
687 bi.bi_position =
688 get_left_neighbor_position(tb, 0);
689 leaf_paste_in_buffer(&bi,
690 n + item_pos -
691 ret_val,
692 pos_in_item,
693 tb->insert_size[0],
694 body, zeros_num);
695
696 /* if appended item is directory, paste entry */
697 pasted =
698 B_N_PITEM_HEAD(tb->L[0],
699 n + item_pos -
700 ret_val);
701 if (is_direntry_le_ih(pasted))
702 leaf_paste_entries(bi.bi_bh,
703 n +
704 item_pos -
705 ret_val,
706 pos_in_item,
707 1,
708 (struct
709 reiserfs_de_head
710 *)body,
711 body +
712 DEH_SIZE,
713 tb->
714 insert_size
715 [0]
716 );
717 /* if appended item is indirect item, put unformatted node into un list */
718 if (is_indirect_le_ih(pasted))
719 set_ih_free_space(pasted, 0);
720 tb->insert_size[0] = 0;
721 zeros_num = 0;
722 }
723 break;
724 default: /* cases d and t */
725 reiserfs_panic(tb->tb_sb,
726 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
727 (flag ==
728 M_DELETE) ? "DELETE" : ((flag ==
729 M_CUT)
730 ? "CUT"
731 :
732 "UNKNOWN"),
733 flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700735 } else {
736 /* new item doesn't fall into L[0] */
737 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700741 /* tb->lnum[0] > 0 */
742 /* Calculate new item position */
743 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700745 if (tb->rnum[0] > 0) {
746 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
747 n = B_NR_ITEMS(tbS0);
748 switch (flag) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700750 case M_INSERT: /* insert item */
751 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
752 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
753 loff_t old_key_comp, old_len,
754 r_zeros_number;
755 const char *r_body;
756 int version;
757 loff_t offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700759 leaf_shift_right(tb, tb->rnum[0] - 1,
760 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700762 version = ih_version(ih);
763 /* Remember key component and item length */
764 old_key_comp = le_ih_k_offset(ih);
765 old_len = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700767 /* Calculate key component and item length to insert into R[0] */
768 offset =
769 le_ih_k_offset(ih) +
770 ((old_len -
771 tb->
772 rbytes) << (is_indirect_le_ih(ih)
773 ? tb->tb_sb->
774 s_blocksize_bits -
775 UNFM_P_SHIFT : 0));
776 set_le_ih_k_offset(ih, offset);
777 put_ih_item_len(ih, tb->rbytes);
778 /* Insert part of the item into R[0] */
779 bi.tb = tb;
780 bi.bi_bh = tb->R[0];
781 bi.bi_parent = tb->FR[0];
782 bi.bi_position =
783 get_right_neighbor_position(tb, 0);
784 if ((old_len - tb->rbytes) > zeros_num) {
785 r_zeros_number = 0;
786 r_body =
787 body + (old_len -
788 tb->rbytes) -
789 zeros_num;
790 } else {
791 r_body = body;
792 r_zeros_number =
793 zeros_num - (old_len -
794 tb->rbytes);
795 zeros_num -= r_zeros_number;
796 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700798 leaf_insert_into_buf(&bi, 0, ih, r_body,
799 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700801 /* Replace right delimiting key by first key in R[0] */
802 replace_key(tb, tb->CFR[0], tb->rkey[0],
803 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700805 /* Calculate key component and item length to insert into S[0] */
806 set_le_ih_k_offset(ih, old_key_comp);
807 put_ih_item_len(ih,
808 old_len - tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700810 tb->insert_size[0] -= tb->rbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700812 } else { /* whole new item falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700814 /* Shift rnum[0]-1 items to R[0] */
815 ret_val =
816 leaf_shift_right(tb,
817 tb->rnum[0] - 1,
818 tb->rbytes);
819 /* Insert new item into R[0] */
820 bi.tb = tb;
821 bi.bi_bh = tb->R[0];
822 bi.bi_parent = tb->FR[0];
823 bi.bi_position =
824 get_right_neighbor_position(tb, 0);
825 leaf_insert_into_buf(&bi,
826 item_pos - n +
827 tb->rnum[0] - 1,
828 ih, body,
829 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700831 if (item_pos - n + tb->rnum[0] - 1 == 0) {
832 replace_key(tb, tb->CFR[0],
833 tb->rkey[0],
834 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700836 }
837 zeros_num = tb->insert_size[0] = 0;
838 }
839 } else { /* new item or part of it doesn't fall into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700841 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700843 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700845 case M_PASTE: /* append item */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700847 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
848 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
849 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
850 int entry_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700852 RFALSE(zeros_num,
853 "PAP-12145: invalid parameter in case of a directory");
854 entry_count =
855 I_ENTRY_COUNT(B_N_PITEM_HEAD
856 (tbS0,
857 item_pos));
858 if (entry_count - tb->rbytes <
859 pos_in_item)
860 /* new directory entry falls into R[0] */
861 {
862 int paste_entry_position;
863
864 RFALSE(tb->rbytes - 1 >=
865 entry_count
866 || !tb->
867 insert_size[0],
868 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
869 tb->rbytes,
870 entry_count);
871 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
872 leaf_shift_right(tb,
873 tb->
874 rnum
875 [0],
876 tb->
877 rbytes
878 - 1);
879 /* Paste given directory entry to directory item */
880 paste_entry_position =
881 pos_in_item -
882 entry_count +
883 tb->rbytes - 1;
884 bi.tb = tb;
885 bi.bi_bh = tb->R[0];
886 bi.bi_parent =
887 tb->FR[0];
888 bi.bi_position =
889 get_right_neighbor_position
890 (tb, 0);
891 leaf_paste_in_buffer
892 (&bi, 0,
893 paste_entry_position,
894 tb->insert_size[0],
895 body, zeros_num);
896 /* paste entry */
897 leaf_paste_entries(bi.
898 bi_bh,
899 0,
900 paste_entry_position,
901 1,
902 (struct
903 reiserfs_de_head
904 *)
905 body,
906 body
907 +
908 DEH_SIZE,
909 tb->
910 insert_size
911 [0]
912 );
913
914 if (paste_entry_position
915 == 0) {
916 /* change delimiting keys */
917 replace_key(tb,
918 tb->
919 CFR
920 [0],
921 tb->
922 rkey
923 [0],
924 tb->
925 R
926 [0],
927 0);
928 }
929
930 tb->insert_size[0] = 0;
931 pos_in_item++;
932 } else { /* new directory entry doesn't fall into R[0] */
933
934 leaf_shift_right(tb,
935 tb->
936 rnum
937 [0],
938 tb->
939 rbytes);
940 }
941 } else { /* regular object */
942
943 int n_shift, n_rem,
944 r_zeros_number;
945 const char *r_body;
946
947 /* Calculate number of bytes which must be shifted from appended item */
948 if ((n_shift =
949 tb->rbytes -
950 tb->insert_size[0]) < 0)
951 n_shift = 0;
952
953 RFALSE(pos_in_item !=
954 ih_item_len
955 (B_N_PITEM_HEAD
956 (tbS0, item_pos)),
957 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
958 pos_in_item,
959 ih_item_len
960 (B_N_PITEM_HEAD
961 (tbS0, item_pos)));
962
963 leaf_shift_right(tb,
964 tb->rnum[0],
965 n_shift);
966 /* Calculate number of bytes which must remain in body after appending to R[0] */
967 if ((n_rem =
968 tb->insert_size[0] -
969 tb->rbytes) < 0)
970 n_rem = 0;
971
972 {
973 int version;
974 unsigned long temp_rem =
975 n_rem;
976
977 version =
978 ih_version
979 (B_N_PITEM_HEAD
980 (tb->R[0], 0));
981 if (is_indirect_le_key
982 (version,
983 B_N_PKEY(tb->R[0],
984 0))) {
985 temp_rem =
986 n_rem <<
987 (tb->tb_sb->
988 s_blocksize_bits
989 -
990 UNFM_P_SHIFT);
991 }
992 set_le_key_k_offset
993 (version,
994 B_N_PKEY(tb->R[0],
995 0),
996 le_key_k_offset
997 (version,
998 B_N_PKEY(tb->R[0],
999 0)) +
1000 temp_rem);
1001 set_le_key_k_offset
1002 (version,
1003 B_N_PDELIM_KEY(tb->
1004 CFR
1005 [0],
1006 tb->
1007 rkey
1008 [0]),
1009 le_key_k_offset
1010 (version,
1011 B_N_PDELIM_KEY
1012 (tb->CFR[0],
1013 tb->rkey[0])) +
1014 temp_rem);
1015 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1017 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001018 do_balance_mark_internal_dirty
1019 (tb, tb->CFR[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001021 /* Append part of body into R[0] */
1022 bi.tb = tb;
1023 bi.bi_bh = tb->R[0];
1024 bi.bi_parent = tb->FR[0];
1025 bi.bi_position =
1026 get_right_neighbor_position
1027 (tb, 0);
1028 if (n_rem > zeros_num) {
1029 r_zeros_number = 0;
1030 r_body =
1031 body + n_rem -
1032 zeros_num;
1033 } else {
1034 r_body = body;
1035 r_zeros_number =
1036 zeros_num - n_rem;
1037 zeros_num -=
1038 r_zeros_number;
1039 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001041 leaf_paste_in_buffer(&bi, 0,
1042 n_shift,
1043 tb->
1044 insert_size
1045 [0] -
1046 n_rem,
1047 r_body,
1048 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001050 if (is_indirect_le_ih
1051 (B_N_PITEM_HEAD
1052 (tb->R[0], 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053#if 0
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001054 RFALSE(n_rem,
1055 "PAP-12160: paste more than one unformatted node pointer");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001057 set_ih_free_space
1058 (B_N_PITEM_HEAD
1059 (tb->R[0], 0), 0);
1060 }
1061 tb->insert_size[0] = n_rem;
1062 if (!n_rem)
1063 pos_in_item++;
1064 }
1065 } else { /* pasted item in whole falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001067 struct item_head *pasted;
1068
1069 ret_val =
1070 leaf_shift_right(tb, tb->rnum[0],
1071 tb->rbytes);
1072 /* append item in R[0] */
1073 if (pos_in_item >= 0) {
1074 bi.tb = tb;
1075 bi.bi_bh = tb->R[0];
1076 bi.bi_parent = tb->FR[0];
1077 bi.bi_position =
1078 get_right_neighbor_position
1079 (tb, 0);
1080 leaf_paste_in_buffer(&bi,
1081 item_pos -
1082 n +
1083 tb->
1084 rnum[0],
1085 pos_in_item,
1086 tb->
1087 insert_size
1088 [0], body,
1089 zeros_num);
1090 }
1091
1092 /* paste new entry, if item is directory item */
1093 pasted =
1094 B_N_PITEM_HEAD(tb->R[0],
1095 item_pos - n +
1096 tb->rnum[0]);
1097 if (is_direntry_le_ih(pasted)
1098 && pos_in_item >= 0) {
1099 leaf_paste_entries(bi.bi_bh,
1100 item_pos -
1101 n +
1102 tb->rnum[0],
1103 pos_in_item,
1104 1,
1105 (struct
1106 reiserfs_de_head
1107 *)body,
1108 body +
1109 DEH_SIZE,
1110 tb->
1111 insert_size
1112 [0]
1113 );
1114 if (!pos_in_item) {
1115
1116 RFALSE(item_pos - n +
1117 tb->rnum[0],
1118 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1119
1120 /* update delimiting keys */
1121 replace_key(tb,
1122 tb->CFR[0],
1123 tb->rkey[0],
1124 tb->R[0],
1125 0);
1126 }
1127 }
1128
1129 if (is_indirect_le_ih(pasted))
1130 set_ih_free_space(pasted, 0);
1131 zeros_num = tb->insert_size[0] = 0;
1132 }
1133 } else { /* new item doesn't fall into R[0] */
1134
1135 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1136 }
1137 break;
1138 default: /* cases d and t */
1139 reiserfs_panic(tb->tb_sb,
1140 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1141 (flag ==
1142 M_DELETE) ? "DELETE" : ((flag ==
1143 M_CUT) ? "CUT"
1144 : "UNKNOWN"),
1145 flag);
1146 }
1147
1148 }
1149
1150 /* tb->rnum[0] > 0 */
1151 RFALSE(tb->blknum[0] > 3,
1152 "PAP-12180: blknum can not be %d. It must be <= 3",
1153 tb->blknum[0]);
1154 RFALSE(tb->blknum[0] < 0,
1155 "PAP-12185: blknum can not be %d. It must be >= 0",
1156 tb->blknum[0]);
1157
1158 /* if while adding to a node we discover that it is possible to split
1159 it in two, and merge the left part into the left neighbor and the
1160 right part into the right neighbor, eliminating the node */
1161 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1162
1163 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1164 "PAP-12190: lnum and rnum must not be zero");
1165 /* if insertion was done before 0-th position in R[0], right
1166 delimiting key of the tb->L[0]'s and left delimiting key are
1167 not set correctly */
1168 if (tb->CFL[0]) {
1169 if (!tb->CFR[0])
1170 reiserfs_panic(tb->tb_sb,
1171 "vs-12195: balance_leaf: CFR not initialized");
1172 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1173 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1174 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1175 }
1176
1177 reiserfs_invalidate_buffer(tb, tbS0);
1178 return 0;
1179 }
1180
1181 /* Fill new nodes that appear in place of S[0] */
1182
1183 /* I am told that this copying is because we need an array to enable
1184 the looping code. -Hans */
1185 snum[0] = tb->s1num, snum[1] = tb->s2num;
1186 sbytes[0] = tb->s1bytes;
1187 sbytes[1] = tb->s2bytes;
1188 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1189
1190 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1191 snum[i]);
1192
1193 /* here we shift from S to S_new nodes */
1194
1195 S_new[i] = get_FEB(tb);
1196
1197 /* initialized block type and tree level */
1198 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1199
1200 n = B_NR_ITEMS(tbS0);
1201
1202 switch (flag) {
1203 case M_INSERT: /* insert item */
1204
1205 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1206 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1207 int old_key_comp, old_len,
1208 r_zeros_number;
1209 const char *r_body;
1210 int version;
1211
1212 /* Move snum[i]-1 items from S[0] to S_new[i] */
1213 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1214 snum[i] - 1, -1,
1215 S_new[i]);
1216 /* Remember key component and item length */
1217 version = ih_version(ih);
1218 old_key_comp = le_ih_k_offset(ih);
1219 old_len = ih_item_len(ih);
1220
1221 /* Calculate key component and item length to insert into S_new[i] */
1222 set_le_ih_k_offset(ih,
1223 le_ih_k_offset(ih) +
1224 ((old_len -
1225 sbytes[i]) <<
1226 (is_indirect_le_ih
1227 (ih) ? tb->tb_sb->
1228 s_blocksize_bits -
1229 UNFM_P_SHIFT :
1230 0)));
1231
1232 put_ih_item_len(ih, sbytes[i]);
1233
1234 /* Insert part of the item into S_new[i] before 0-th item */
1235 bi.tb = tb;
1236 bi.bi_bh = S_new[i];
1237 bi.bi_parent = NULL;
1238 bi.bi_position = 0;
1239
1240 if ((old_len - sbytes[i]) > zeros_num) {
1241 r_zeros_number = 0;
1242 r_body =
1243 body + (old_len -
1244 sbytes[i]) -
1245 zeros_num;
1246 } else {
1247 r_body = body;
1248 r_zeros_number =
1249 zeros_num - (old_len -
1250 sbytes[i]);
1251 zeros_num -= r_zeros_number;
1252 }
1253
1254 leaf_insert_into_buf(&bi, 0, ih, r_body,
1255 r_zeros_number);
1256
1257 /* Calculate key component and item length to insert into S[i] */
1258 set_le_ih_k_offset(ih, old_key_comp);
1259 put_ih_item_len(ih,
1260 old_len - sbytes[i]);
1261 tb->insert_size[0] -= sbytes[i];
1262 } else { /* whole new item falls into S_new[i] */
1263
1264 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1265 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1266 snum[i] - 1, sbytes[i],
1267 S_new[i]);
1268
1269 /* Insert new item into S_new[i] */
1270 bi.tb = tb;
1271 bi.bi_bh = S_new[i];
1272 bi.bi_parent = NULL;
1273 bi.bi_position = 0;
1274 leaf_insert_into_buf(&bi,
1275 item_pos - n +
1276 snum[i] - 1, ih,
1277 body, zeros_num);
1278
1279 zeros_num = tb->insert_size[0] = 0;
1280 }
1281 }
1282
1283 else { /* new item or it part don't falls into S_new[i] */
1284
1285 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1286 snum[i], sbytes[i], S_new[i]);
1287 }
1288 break;
1289
1290 case M_PASTE: /* append item */
1291
1292 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
1293 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
1294 struct item_head *aux_ih;
1295
1296 RFALSE(ih, "PAP-12210: ih must be 0");
1297
1298 if (is_direntry_le_ih
1299 (aux_ih =
1300 B_N_PITEM_HEAD(tbS0, item_pos))) {
1301 /* we append to directory item */
1302
1303 int entry_count;
1304
1305 entry_count =
1306 ih_entry_count(aux_ih);
1307
1308 if (entry_count - sbytes[i] <
1309 pos_in_item
1310 && pos_in_item <=
1311 entry_count) {
1312 /* new directory entry falls into S_new[i] */
1313
1314 RFALSE(!tb->
1315 insert_size[0],
1316 "PAP-12215: insert_size is already 0");
1317 RFALSE(sbytes[i] - 1 >=
1318 entry_count,
1319 "PAP-12220: there are no so much entries (%d), only %d",
1320 sbytes[i] - 1,
1321 entry_count);
1322
1323 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1324 leaf_move_items
1325 (LEAF_FROM_S_TO_SNEW,
1326 tb, snum[i],
1327 sbytes[i] - 1,
1328 S_new[i]);
1329 /* Paste given directory entry to directory item */
1330 bi.tb = tb;
1331 bi.bi_bh = S_new[i];
1332 bi.bi_parent = NULL;
1333 bi.bi_position = 0;
1334 leaf_paste_in_buffer
1335 (&bi, 0,
1336 pos_in_item -
1337 entry_count +
1338 sbytes[i] - 1,
1339 tb->insert_size[0],
1340 body, zeros_num);
1341 /* paste new directory entry */
1342 leaf_paste_entries(bi.
1343 bi_bh,
1344 0,
1345 pos_in_item
1346 -
1347 entry_count
1348 +
1349 sbytes
1350 [i] -
1351 1, 1,
1352 (struct
1353 reiserfs_de_head
1354 *)
1355 body,
1356 body
1357 +
1358 DEH_SIZE,
1359 tb->
1360 insert_size
1361 [0]
1362 );
1363 tb->insert_size[0] = 0;
1364 pos_in_item++;
1365 } else { /* new directory entry doesn't fall into S_new[i] */
1366 leaf_move_items
1367 (LEAF_FROM_S_TO_SNEW,
1368 tb, snum[i],
1369 sbytes[i],
1370 S_new[i]);
1371 }
1372 } else { /* regular object */
1373
1374 int n_shift, n_rem,
1375 r_zeros_number;
1376 const char *r_body;
1377
1378 RFALSE(pos_in_item !=
1379 ih_item_len
1380 (B_N_PITEM_HEAD
1381 (tbS0, item_pos))
1382 || tb->insert_size[0] <=
1383 0,
1384 "PAP-12225: item too short or insert_size <= 0");
1385
1386 /* Calculate number of bytes which must be shifted from appended item */
1387 n_shift =
1388 sbytes[i] -
1389 tb->insert_size[0];
1390 if (n_shift < 0)
1391 n_shift = 0;
1392 leaf_move_items
1393 (LEAF_FROM_S_TO_SNEW, tb,
1394 snum[i], n_shift,
1395 S_new[i]);
1396
1397 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1398 n_rem =
1399 tb->insert_size[0] -
1400 sbytes[i];
1401 if (n_rem < 0)
1402 n_rem = 0;
1403 /* Append part of body into S_new[0] */
1404 bi.tb = tb;
1405 bi.bi_bh = S_new[i];
1406 bi.bi_parent = NULL;
1407 bi.bi_position = 0;
1408
1409 if (n_rem > zeros_num) {
1410 r_zeros_number = 0;
1411 r_body =
1412 body + n_rem -
1413 zeros_num;
1414 } else {
1415 r_body = body;
1416 r_zeros_number =
1417 zeros_num - n_rem;
1418 zeros_num -=
1419 r_zeros_number;
1420 }
1421
1422 leaf_paste_in_buffer(&bi, 0,
1423 n_shift,
1424 tb->
1425 insert_size
1426 [0] -
1427 n_rem,
1428 r_body,
1429 r_zeros_number);
1430 {
1431 struct item_head *tmp;
1432
1433 tmp =
1434 B_N_PITEM_HEAD(S_new
1435 [i],
1436 0);
1437 if (is_indirect_le_ih
1438 (tmp)) {
1439 set_ih_free_space
1440 (tmp, 0);
1441 set_le_ih_k_offset
1442 (tmp,
1443 le_ih_k_offset
1444 (tmp) +
1445 (n_rem <<
1446 (tb->
1447 tb_sb->
1448 s_blocksize_bits
1449 -
1450 UNFM_P_SHIFT)));
1451 } else {
1452 set_le_ih_k_offset
1453 (tmp,
1454 le_ih_k_offset
1455 (tmp) +
1456 n_rem);
1457 }
1458 }
1459
1460 tb->insert_size[0] = n_rem;
1461 if (!n_rem)
1462 pos_in_item++;
1463 }
1464 } else
1465 /* item falls wholly into S_new[i] */
1466 {
Harvey Harrison8acc5702008-04-28 02:16:21 -07001467 int leaf_mi;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001468 struct item_head *pasted;
1469
1470#ifdef CONFIG_REISERFS_CHECK
Harvey Harrison8acc5702008-04-28 02:16:21 -07001471 struct item_head *ih_check =
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001472 B_N_PITEM_HEAD(tbS0, item_pos);
1473
Harvey Harrison8acc5702008-04-28 02:16:21 -07001474 if (!is_direntry_le_ih(ih_check)
1475 && (pos_in_item != ih_item_len(ih_check)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001476 || tb->insert_size[0] <= 0))
1477 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */
1480
Harvey Harrison8acc5702008-04-28 02:16:21 -07001481 leaf_mi =
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001482 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1483 tb, snum[i],
1484 sbytes[i],
1485 S_new[i]);
1486
Harvey Harrison8acc5702008-04-28 02:16:21 -07001487 RFALSE(leaf_mi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001488 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
Harvey Harrison8acc5702008-04-28 02:16:21 -07001489 leaf_mi);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001490
1491 /* paste into item */
1492 bi.tb = tb;
1493 bi.bi_bh = S_new[i];
1494 bi.bi_parent = NULL;
1495 bi.bi_position = 0;
1496 leaf_paste_in_buffer(&bi,
1497 item_pos - n +
1498 snum[i],
1499 pos_in_item,
1500 tb->insert_size[0],
1501 body, zeros_num);
1502
1503 pasted =
1504 B_N_PITEM_HEAD(S_new[i],
1505 item_pos - n +
1506 snum[i]);
1507 if (is_direntry_le_ih(pasted)) {
1508 leaf_paste_entries(bi.bi_bh,
1509 item_pos -
1510 n + snum[i],
1511 pos_in_item,
1512 1,
1513 (struct
1514 reiserfs_de_head
1515 *)body,
1516 body +
1517 DEH_SIZE,
1518 tb->
1519 insert_size
1520 [0]
1521 );
1522 }
1523
1524 /* if we paste to indirect item update ih_free_space */
1525 if (is_indirect_le_ih(pasted))
1526 set_ih_free_space(pasted, 0);
1527 zeros_num = tb->insert_size[0] = 0;
1528 }
1529 }
1530
1531 else { /* pasted item doesn't fall into S_new[i] */
1532
1533 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1534 snum[i], sbytes[i], S_new[i]);
1535 }
1536 break;
1537 default: /* cases d and t */
1538 reiserfs_panic(tb->tb_sb,
1539 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1540 (flag ==
1541 M_DELETE) ? "DELETE" : ((flag ==
1542 M_CUT) ? "CUT"
1543 : "UNKNOWN"),
1544 flag);
1545 }
1546
1547 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1548 insert_ptr[i] = S_new[i];
1549
1550 RFALSE(!buffer_journaled(S_new[i])
1551 || buffer_journal_dirty(S_new[i])
1552 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1553 i, S_new[i]);
1554 }
1555
1556 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1557 affected item which remains in S */
1558 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1559
1560 switch (flag) {
1561 case M_INSERT: /* insert item into S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 bi.tb = tb;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001563 bi.bi_bh = tbS0;
1564 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1565 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1566 leaf_insert_into_buf(&bi, item_pos, ih, body,
1567 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001569 /* If we insert the first key change the delimiting key */
1570 if (item_pos == 0) {
1571 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1572 replace_key(tb, tb->CFL[0], tb->lkey[0],
1573 tbS0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001576 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001578 case M_PASTE:{ /* append item in S[0] */
1579 struct item_head *pasted;
1580
1581 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1582 /* when directory, may be new entry already pasted */
1583 if (is_direntry_le_ih(pasted)) {
1584 if (pos_in_item >= 0 &&
1585 pos_in_item <=
1586 ih_entry_count(pasted)) {
1587
1588 RFALSE(!tb->insert_size[0],
1589 "PAP-12260: insert_size is 0 already");
1590
1591 /* prepare space */
1592 bi.tb = tb;
1593 bi.bi_bh = tbS0;
1594 bi.bi_parent =
1595 PATH_H_PPARENT(tb->tb_path,
1596 0);
1597 bi.bi_position =
1598 PATH_H_POSITION(tb->tb_path,
1599 1);
1600 leaf_paste_in_buffer(&bi,
1601 item_pos,
1602 pos_in_item,
1603 tb->
1604 insert_size
1605 [0], body,
1606 zeros_num);
1607
1608 /* paste entry */
1609 leaf_paste_entries(bi.bi_bh,
1610 item_pos,
1611 pos_in_item,
1612 1,
1613 (struct
1614 reiserfs_de_head
1615 *)body,
1616 body +
1617 DEH_SIZE,
1618 tb->
1619 insert_size
1620 [0]
1621 );
1622 if (!item_pos && !pos_in_item) {
1623 RFALSE(!tb->CFL[0]
1624 || !tb->L[0],
1625 "PAP-12270: CFL[0]/L[0] must be specified");
1626 if (tb->CFL[0]) {
1627 replace_key(tb,
1628 tb->
1629 CFL
1630 [0],
1631 tb->
1632 lkey
1633 [0],
1634 tbS0,
1635 0);
1636
1637 }
1638 }
1639 tb->insert_size[0] = 0;
1640 }
1641 } else { /* regular object */
1642 if (pos_in_item == ih_item_len(pasted)) {
1643
1644 RFALSE(tb->insert_size[0] <= 0,
1645 "PAP-12275: insert size must not be %d",
1646 tb->insert_size[0]);
1647 bi.tb = tb;
1648 bi.bi_bh = tbS0;
1649 bi.bi_parent =
1650 PATH_H_PPARENT(tb->tb_path,
1651 0);
1652 bi.bi_position =
1653 PATH_H_POSITION(tb->tb_path,
1654 1);
1655 leaf_paste_in_buffer(&bi,
1656 item_pos,
1657 pos_in_item,
1658 tb->
1659 insert_size
1660 [0], body,
1661 zeros_num);
1662
1663 if (is_indirect_le_ih(pasted)) {
1664#if 0
1665 RFALSE(tb->
1666 insert_size[0] !=
1667 UNFM_P_SIZE,
1668 "PAP-12280: insert_size for indirect item must be %d, not %d",
1669 UNFM_P_SIZE,
1670 tb->
1671 insert_size[0]);
1672#endif
1673 set_ih_free_space
1674 (pasted, 0);
1675 }
1676 tb->insert_size[0] = 0;
1677 }
1678#ifdef CONFIG_REISERFS_CHECK
1679 else {
1680 if (tb->insert_size[0]) {
1681 print_cur_tb("12285");
1682 reiserfs_panic(tb->
1683 tb_sb,
1684 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1685 tb->
1686 insert_size
1687 [0]);
1688 }
1689 }
1690#endif /* CONFIG_REISERFS_CHECK */
1691
1692 }
1693 } /* case M_PASTE: */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001696#ifdef CONFIG_REISERFS_CHECK
1697 if (flag == M_PASTE && tb->insert_size[0]) {
1698 print_cur_tb("12290");
1699 reiserfs_panic(tb->tb_sb,
1700 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1701 tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001703#endif /* CONFIG_REISERFS_CHECK */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 return 0;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001706} /* Leaf level of the tree is balanced (end of balance_leaf) */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
1708/* Make empty node */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001709void make_empty_node(struct buffer_info *bi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001711 struct block_head *blkh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001713 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001715 blkh = B_BLK_HEAD(bi->bi_bh);
1716 set_blkh_nr_item(blkh, 0);
1717 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001719 if (bi->bi_parent)
1720 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721}
1722
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723/* Get first empty buffer */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001724struct buffer_head *get_FEB(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001726 int i;
1727 struct buffer_head *first_b;
1728 struct buffer_info bi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001730 for (i = 0; i < MAX_FEB_SIZE; i++)
Al Viro9dce07f2008-03-29 03:07:28 +00001731 if (tb->FEB[i] != NULL)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001732 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001734 if (i == MAX_FEB_SIZE)
1735 reiserfs_panic(tb->tb_sb,
1736 "vs-12300: get_FEB: FEB list is empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001738 bi.tb = tb;
1739 bi.bi_bh = first_b = tb->FEB[i];
1740 bi.bi_parent = NULL;
1741 bi.bi_position = 0;
1742 make_empty_node(&bi);
1743 set_buffer_uptodate(first_b);
1744 tb->FEB[i] = NULL;
1745 tb->used[i] = first_b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001747 return (first_b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748}
1749
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750/* This is now used because reiserfs_free_block has to be able to
1751** schedule.
1752*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001753static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001755 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001757 if (buffer_dirty(bh))
1758 reiserfs_warning(tb->tb_sb,
1759 "store_thrown deals with dirty buffer");
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -08001760 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001761 if (!tb->thrown[i]) {
1762 tb->thrown[i] = bh;
1763 get_bh(bh); /* free_thrown puts this */
1764 return;
1765 }
1766 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767}
1768
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001769static void free_thrown(struct tree_balance *tb)
1770{
1771 int i;
1772 b_blocknr_t blocknr;
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -08001773 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001774 if (tb->thrown[i]) {
1775 blocknr = tb->thrown[i]->b_blocknr;
1776 if (buffer_dirty(tb->thrown[i]))
1777 reiserfs_warning(tb->tb_sb,
1778 "free_thrown deals with dirty buffer %d",
1779 blocknr);
1780 brelse(tb->thrown[i]); /* incremented in store_thrown */
1781 reiserfs_free_block(tb->transaction_handle, NULL,
1782 blocknr, 0);
1783 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785}
1786
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001787void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001789 struct block_head *blkh;
1790 blkh = B_BLK_HEAD(bh);
1791 set_blkh_level(blkh, FREE_LEVEL);
1792 set_blkh_nr_item(blkh, 0);
1793
1794 clear_buffer_dirty(bh);
1795 store_thrown(tb, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796}
1797
1798/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001799void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1800 struct buffer_head *src, int n_src)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801{
1802
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001803 RFALSE(dest == NULL || src == NULL,
1804 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1805 src, dest);
1806 RFALSE(!B_IS_KEYS_LEVEL(dest),
1807 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1808 dest);
1809 RFALSE(n_dest < 0 || n_src < 0,
1810 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1811 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1812 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1813 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001815 if (B_IS_ITEMS_LEVEL(src))
1816 /* source buffer contains leaf node */
1817 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1818 KEY_SIZE);
1819 else
1820 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1821 KEY_SIZE);
1822
1823 do_balance_mark_internal_dirty(tb, dest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
1825
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001826int get_left_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001828 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Al Viro9dce07f2008-03-29 03:07:28 +00001830 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001831 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1832 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001834 if (Sh_position == 0)
1835 return B_NR_ITEMS(tb->FL[h]);
1836 else
1837 return Sh_position - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838}
1839
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001840int get_right_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001842 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
Al Viro9dce07f2008-03-29 03:07:28 +00001844 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001845 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1846 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001848 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1849 return 0;
1850 else
1851 return Sh_position + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852}
1853
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854#ifdef CONFIG_REISERFS_CHECK
1855
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001856int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1857static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1858 char *mes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001860 struct disk_child *dc;
1861 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001863 RFALSE(!bh, "PAP-12336: bh == 0");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001865 if (!bh || !B_IS_IN_TREE(bh))
1866 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001868 RFALSE(!buffer_dirty(bh) &&
1869 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1870 "PAP-12337: buffer (%b) must be dirty", bh);
1871 dc = B_N_CHILD(bh, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001873 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1874 if (!is_reusable(s, dc_block_number(dc), 1)) {
1875 print_cur_tb(mes);
1876 reiserfs_panic(s,
1877 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1878 dc, bh);
1879 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001881}
1882
1883static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1884{
1885 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1886 !B_IS_IN_TREE(bh)) {
1887 reiserfs_warning(NULL,
1888 "vs-12339: locked_or_not_in_tree: %s (%b)",
1889 which, bh);
1890 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001892 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893}
1894
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001895static int check_before_balancing(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001897 int retval = 0;
1898
1899 if (cur_tb) {
1900 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1901 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1902 "do_balance cannot properly handle schedule occurring while it runs.");
1903 }
1904
1905 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1906 prepped all of these for us). */
1907 if (tb->lnum[0]) {
1908 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1909 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1910 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1911 check_leaf(tb->L[0]);
1912 }
1913 if (tb->rnum[0]) {
1914 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1915 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1916 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1917 check_leaf(tb->R[0]);
1918 }
1919 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1920 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1921
1922 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923}
1924
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001925static void check_after_balance_leaf(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001927 if (tb->lnum[0]) {
1928 if (B_FREE_SPACE(tb->L[0]) !=
1929 MAX_CHILD_SIZE(tb->L[0]) -
1930 dc_size(B_N_CHILD
1931 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1932 print_cur_tb("12221");
1933 reiserfs_panic(tb->tb_sb,
1934 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1935 }
1936 }
1937 if (tb->rnum[0]) {
1938 if (B_FREE_SPACE(tb->R[0]) !=
1939 MAX_CHILD_SIZE(tb->R[0]) -
1940 dc_size(B_N_CHILD
1941 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1942 print_cur_tb("12222");
1943 reiserfs_panic(tb->tb_sb,
1944 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1945 }
1946 }
1947 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1948 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1949 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1950 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1951 PATH_H_POSITION(tb->tb_path, 1)))))) {
1952 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1953 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1954 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1955 PATH_H_POSITION(tb->tb_path,
1956 1))));
1957 print_cur_tb("12223");
1958 reiserfs_warning(tb->tb_sb,
1959 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1960 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1961 left,
1962 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1963 PATH_H_PBUFFER(tb->tb_path, 1),
1964 PATH_H_POSITION(tb->tb_path, 1),
1965 dc_size(B_N_CHILD
1966 (PATH_H_PBUFFER(tb->tb_path, 1),
1967 PATH_H_POSITION(tb->tb_path, 1))),
1968 right);
1969 reiserfs_panic(tb->tb_sb,
1970 "PAP-12365: check_after_balance_leaf: S is incorrect");
1971 }
1972}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001974static void check_leaf_level(struct tree_balance *tb)
1975{
1976 check_leaf(tb->L[0]);
1977 check_leaf(tb->R[0]);
1978 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1979}
1980
1981static void check_internal_levels(struct tree_balance *tb)
1982{
1983 int h;
1984
1985 /* check all internal nodes */
1986 for (h = 1; tb->insert_size[h]; h++) {
1987 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1988 "BAD BUFFER ON PATH");
1989 if (tb->lnum[h])
1990 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1991 if (tb->rnum[h])
1992 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1993 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
1995}
1996
1997#endif
1998
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999/* Now we have all of the buffers that must be used in balancing of
2000 the tree. We rely on the assumption that schedule() will not occur
2001 while do_balance works. ( Only interrupt handlers are acceptable.)
2002 We balance the tree according to the analysis made before this,
2003 using buffers already obtained. For SMP support it will someday be
2004 necessary to add ordered locking of tb. */
2005
2006/* Some interesting rules of balancing:
2007
2008 we delete a maximum of two nodes per level per balancing: we never
2009 delete R, when we delete two of three nodes L, S, R then we move
2010 them into R.
2011
2012 we only delete L if we are deleting two nodes, if we delete only
2013 one node we delete S
2014
2015 if we shift leaves then we shift as much as we can: this is a
2016 deliberate policy of extremism in node packing which results in
2017 higher average utilization after repeated random balance operations
2018 at the cost of more memory copies and more balancing as a result of
2019 small insertions to full nodes.
2020
2021 if we shift internal nodes we try to evenly balance the node
2022 utilization, with consequent less balancing at the cost of lower
2023 utilization.
2024
2025 one could argue that the policy for directories in leaves should be
2026 that of internal nodes, but we will wait until another day to
2027 evaluate this.... It would be nice to someday measure and prove
2028 these assumptions as to what is optimal....
2029
2030*/
2031
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002032static inline void do_balance_starts(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002034 /* use print_cur_tb() to see initial state of struct
2035 tree_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002037 /* store_print_tb (tb); */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002039 /* do not delete, just comment it out */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2041 "check");*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002042 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002044 cur_tb = tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045#endif
2046}
2047
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002048static inline void do_balance_completed(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002050
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002052 check_leaf_level(tb);
2053 check_internal_levels(tb);
2054 cur_tb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055#endif
2056
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002057 /* reiserfs_free_block is no longer schedule safe. So, we need to
2058 ** put the buffers we want freed on the thrown list during do_balance,
2059 ** and then free them now
2060 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002062 REISERFS_SB(tb->tb_sb)->s_do_balance++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002064 /* release all nodes hold to perform the balancing */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 unfix_nodes(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002067 free_thrown(tb);
2068}
2069
2070void do_balance(struct tree_balance *tb, /* tree_balance structure */
2071 struct item_head *ih, /* item header of inserted item */
2072 const char *body, /* body of inserted item or bytes to paste */
2073 int flag)
2074{ /* i - insert, d - delete
2075 c - cut, p - paste
2076
2077 Cut means delete part of an item
2078 (includes removing an entry from a
2079 directory).
2080
2081 Delete means delete whole item.
2082
2083 Insert means add a new item into the
2084 tree.
2085
2086 Paste means to append to the end of an
2087 existing file or to insert a directory
2088 entry. */
2089 int child_pos, /* position of a child node in its parent */
2090 h; /* level of the tree being processed */
2091 struct item_head insert_key[2]; /* in our processing of one level
2092 we sometimes determine what
2093 must be inserted into the next
2094 higher level. This insertion
2095 consists of a key or two keys
2096 and their corresponding
2097 pointers */
2098 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2099 level */
2100
2101 tb->tb_mode = flag;
2102 tb->need_balance_dirty = 0;
2103
2104 if (FILESYSTEM_CHANGED_TB(tb)) {
2105 reiserfs_panic(tb->tb_sb,
2106 "clm-6000: do_balance, fs generation has changed\n");
2107 }
2108 /* if we have no real work to do */
2109 if (!tb->insert_size[0]) {
2110 reiserfs_warning(tb->tb_sb,
2111 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2112 flag);
2113 unfix_nodes(tb);
2114 return;
2115 }
2116
2117 atomic_inc(&(fs_generation(tb->tb_sb)));
2118 do_balance_starts(tb);
2119
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 /* balance leaf returns 0 except if combining L R and S into
2121 one node. see balance_internal() for explanation of this
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002122 line of code. */
2123 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
2124 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125
2126#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002127 check_after_balance_leaf(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128#endif
2129
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002130 /* Balance internal level of the tree. */
2131 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
2132 child_pos =
2133 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002135 do_balance_completed(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136
2137}