blob: fba304e64de81d51805d52434b152a6fc8aed50b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree */
6/* Further calculations can not cause schedule(), and thus the buffer */
7/* tree will be stable until the balancing will be finished */
8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */
10
Linus Torvalds1da177e2005-04-16 15:20:36 -070011/**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <linux/time.h>
20#include <linux/reiserfs_fs.h>
21#include <linux/buffer_head.h>
22
23#ifdef CONFIG_REISERFS_CHECK
24
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070025struct tree_balance *cur_tb = NULL; /* detects whether more than one
26 copy of tb exists as a means
27 of checking whether schedule
28 is interrupting do_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#endif
30
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070031inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
32 struct buffer_head *bh, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070033{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070034 journal_mark_dirty(tb->transaction_handle,
35 tb->transaction_handle->t_super, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -070036}
37
38#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
39#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
40
Linus Torvalds1da177e2005-04-16 15:20:36 -070041/* summary:
42 if deleting something ( tb->insert_size[0] < 0 )
43 return(balance_leaf_when_delete()); (flag d handled here)
44 else
45 if lnum is larger than 0 we put items into the left node
46 if rnum is larger than 0 we put items into the right node
47 if snum1 is larger than 0 we put items into the new node s1
48 if snum2 is larger than 0 we put items into the new node s2
49Note that all *num* count new items being created.
50
51It would be easier to read balance_leaf() if each of these summary
52lines was a separate procedure rather than being inlined. I think
53that there are many passages here and in balance_leaf_when_delete() in
54which two calls to one procedure can replace two passages, and it
55might save cache space and improve software maintenance costs to do so.
56
57Vladimir made the perceptive comment that we should offload most of
58the decision making in this function into fix_nodes/check_balance, and
59then create some sort of structure in tb that says what actions should
60be performed by do_balance.
61
62-Hans */
63
Linus Torvalds1da177e2005-04-16 15:20:36 -070064/* Balance leaf node in case of delete or cut: insert_size[0] < 0
65 *
66 * lnum, rnum can have values >= -1
67 * -1 means that the neighbor must be joined with S
68 * 0 means that nothing should be done with the neighbor
69 * >0 means to shift entirely or partly the specified number of items to the neighbor
70 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070071static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070072{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070073 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
74 int item_pos = PATH_LAST_POSITION(tb->tb_path);
75 int pos_in_item = tb->tb_path->pos_in_item;
76 struct buffer_info bi;
77 int n;
78 struct item_head *ih;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070080 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
81 "vs- 12000: level: wrong FR %z", tb->FR[0]);
82 RFALSE(tb->blknum[0] > 1,
83 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
84 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
85 "PAP-12010: tree can not be empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070087 ih = B_N_PITEM_HEAD(tbS0, item_pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070089 /* Delete or truncate the item */
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070091 switch (flag) {
92 case M_DELETE: /* delete item in S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -070093
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070094 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
95 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
96 -tb->insert_size[0], ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070098 bi.tb = tb;
99 bi.bi_bh = tbS0;
100 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
101 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
102 leaf_delete_items(&bi, 0, item_pos, 1, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700104 if (!item_pos && tb->CFL[0]) {
105 if (B_NR_ITEMS(tbS0)) {
106 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
107 0);
108 } else {
109 if (!PATH_H_POSITION(tb->tb_path, 1))
110 replace_key(tb, tb->CFL[0], tb->lkey[0],
111 PATH_H_PPARENT(tb->tb_path,
112 0), 0);
113 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700116 RFALSE(!item_pos && !tb->CFL[0],
117 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
118 tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700120 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700122 case M_CUT:{ /* cut item in S[0] */
123 bi.tb = tb;
124 bi.bi_bh = tbS0;
125 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
126 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
127 if (is_direntry_le_ih(ih)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700129 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
130 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
131 tb->insert_size[0] = -1;
132 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
133 -tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700135 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
136 "PAP-12030: can not change delimiting key. CFL[0]=%p",
137 tb->CFL[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700139 if (!item_pos && !pos_in_item && tb->CFL[0]) {
140 replace_key(tb, tb->CFL[0], tb->lkey[0],
141 tbS0, 0);
142 }
143 } else {
144 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
145 -tb->insert_size[0]);
146
147 RFALSE(!ih_item_len(ih),
148 "PAP-12035: cut must leave non-zero dynamic length of item");
149 }
150 break;
151 }
152
153 default:
154 print_cur_tb("12040");
155 reiserfs_panic(tb->tb_sb,
156 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
157 (flag ==
158 M_PASTE) ? "PASTE" : ((flag ==
159 M_INSERT) ? "INSERT" :
160 "UNKNOWN"), flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700163 /* the rule is that no shifting occurs unless by shifting a node can be freed */
164 n = B_NR_ITEMS(tbS0);
165 if (tb->lnum[0]) { /* L[0] takes part in balancing */
166 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
167 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
168 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
169 /* all contents of all the 3 buffers will be in L[0] */
170 if (PATH_H_POSITION(tb->tb_path, 1) == 0
171 && 1 < B_NR_ITEMS(tb->FR[0]))
172 replace_key(tb, tb->CFL[0],
173 tb->lkey[0],
174 tb->FR[0], 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700176 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
177 -1, NULL);
178 leaf_move_items(LEAF_FROM_R_TO_L, tb,
179 B_NR_ITEMS(tb->R[0]),
180 -1, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700182 reiserfs_invalidate_buffer(tb, tbS0);
183 reiserfs_invalidate_buffer(tb,
184 tb->R[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700186 return 0;
187 }
188 /* all contents of all the 3 buffers will be in R[0] */
189 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
190 NULL);
191 leaf_move_items(LEAF_FROM_L_TO_R, tb,
192 B_NR_ITEMS(tb->L[0]), -1, NULL);
193
194 /* right_delimiting_key is correct in R[0] */
195 replace_key(tb, tb->CFR[0], tb->rkey[0],
196 tb->R[0], 0);
197
198 reiserfs_invalidate_buffer(tb, tbS0);
199 reiserfs_invalidate_buffer(tb, tb->L[0]);
200
201 return -1;
202 }
203
204 RFALSE(tb->rnum[0] != 0,
205 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
206 /* all contents of L[0] and S[0] will be in L[0] */
207 leaf_shift_left(tb, n, -1);
208
209 reiserfs_invalidate_buffer(tb, tbS0);
210
211 return 0;
212 }
213 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
214
215 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
216 (tb->lnum[0] + tb->rnum[0] > n + 1),
217 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
218 tb->rnum[0], tb->lnum[0], n);
219 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
220 (tb->lbytes != -1 || tb->rbytes != -1),
221 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
222 tb->rbytes, tb->lbytes);
223 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
224 (tb->lbytes < 1 || tb->rbytes != -1),
225 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
226 tb->rbytes, tb->lbytes);
227
228 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
229 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
230
231 reiserfs_invalidate_buffer(tb, tbS0);
232
233 return 0;
234 }
235
236 if (tb->rnum[0] == -1) {
237 /* all contents of R[0] and S[0] will be in R[0] */
238 leaf_shift_right(tb, n, -1);
239 reiserfs_invalidate_buffer(tb, tbS0);
240 return 0;
241 }
242
243 RFALSE(tb->rnum[0],
244 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246}
247
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700248static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
249 const char *body, /* body of inserted item or bytes to paste */
250 int flag, /* i - insert, d - delete, c - cut, p - paste
251 (see comment to do_balance) */
252 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
253 must be inserted into the next higher level. This insertion
254 consists of a key or two keys and their corresponding
255 pointers */
256 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 )
258{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700259 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
260 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
261 of the affected item */
262 struct buffer_info bi;
263 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
264 int snum[2]; /* number of items that will be placed
265 into S_new (includes partially shifted
266 items) */
267 int sbytes[2]; /* if an item is partially shifted into S_new then
268 if it is a directory item
269 it is the number of entries from the item that are shifted into S_new
270 else
271 it is the number of bytes from the item that are shifted into S_new
272 */
273 int n, i;
274 int ret_val;
275 int pos_in_item;
276 int zeros_num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700278 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700280 /* Make balance in case insert_size[0] < 0 */
281 if (tb->insert_size[0] < 0)
282 return balance_leaf_when_delete(tb, flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700284 zeros_num = 0;
285 if (flag == M_INSERT && body == 0)
286 zeros_num = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700288 pos_in_item = tb->tb_path->pos_in_item;
289 /* for indirect item pos_in_item is measured in unformatted node
290 pointers. Recalculate to bytes */
291 if (flag != M_INSERT
292 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
293 pos_in_item *= UNFM_P_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700295 if (tb->lnum[0] > 0) {
296 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
297 if (item_pos < tb->lnum[0]) {
298 /* new item or it part falls to L[0], shift it too */
299 n = B_NR_ITEMS(tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700301 switch (flag) {
302 case M_INSERT: /* insert item into L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700304 if (item_pos == tb->lnum[0] - 1
305 && tb->lbytes != -1) {
306 /* part of new item falls into L[0] */
307 int new_item_len;
308 int version;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700310 ret_val =
311 leaf_shift_left(tb, tb->lnum[0] - 1,
312 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700314 /* Calculate item length to insert to S[0] */
315 new_item_len =
316 ih_item_len(ih) - tb->lbytes;
317 /* Calculate and check item length to insert to L[0] */
318 put_ih_item_len(ih,
319 ih_item_len(ih) -
320 new_item_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700322 RFALSE(ih_item_len(ih) <= 0,
323 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
324 ih_item_len(ih));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700326 /* Insert new item into L[0] */
327 bi.tb = tb;
328 bi.bi_bh = tb->L[0];
329 bi.bi_parent = tb->FL[0];
330 bi.bi_position =
331 get_left_neighbor_position(tb, 0);
332 leaf_insert_into_buf(&bi,
333 n + item_pos -
334 ret_val, ih, body,
335 zeros_num >
336 ih_item_len(ih) ?
337 ih_item_len(ih) :
338 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700340 version = ih_version(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700342 /* Calculate key component, item length and body to insert into S[0] */
343 set_le_ih_k_offset(ih,
344 le_ih_k_offset(ih) +
345 (tb->
346 lbytes <<
347 (is_indirect_le_ih
348 (ih) ? tb->tb_sb->
349 s_blocksize_bits -
350 UNFM_P_SHIFT :
351 0)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700353 put_ih_item_len(ih, new_item_len);
354 if (tb->lbytes > zeros_num) {
355 body +=
356 (tb->lbytes - zeros_num);
357 zeros_num = 0;
358 } else
359 zeros_num -= tb->lbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700361 RFALSE(ih_item_len(ih) <= 0,
362 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
363 ih_item_len(ih));
364 } else {
365 /* new item in whole falls into L[0] */
366 /* Shift lnum[0]-1 items to L[0] */
367 ret_val =
368 leaf_shift_left(tb, tb->lnum[0] - 1,
369 tb->lbytes);
370 /* Insert new item into L[0] */
371 bi.tb = tb;
372 bi.bi_bh = tb->L[0];
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position =
375 get_left_neighbor_position(tb, 0);
376 leaf_insert_into_buf(&bi,
377 n + item_pos -
378 ret_val, ih, body,
379 zeros_num);
380 tb->insert_size[0] = 0;
381 zeros_num = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700383 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700385 case M_PASTE: /* append item in L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700387 if (item_pos == tb->lnum[0] - 1
388 && tb->lbytes != -1) {
389 /* we must shift the part of the appended item */
390 if (is_direntry_le_ih
391 (B_N_PITEM_HEAD(tbS0, item_pos))) {
392
393 RFALSE(zeros_num,
394 "PAP-12090: invalid parameter in case of a directory");
395 /* directory item */
396 if (tb->lbytes > pos_in_item) {
397 /* new directory entry falls into L[0] */
398 struct item_head
399 *pasted;
400 int l_pos_in_item =
401 pos_in_item;
402
403 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
404 ret_val =
405 leaf_shift_left(tb,
406 tb->
407 lnum
408 [0],
409 tb->
410 lbytes
411 -
412 1);
413 if (ret_val
414 && !item_pos) {
415 pasted =
416 B_N_PITEM_HEAD
417 (tb->L[0],
418 B_NR_ITEMS
419 (tb->
420 L[0]) -
421 1);
422 l_pos_in_item +=
423 I_ENTRY_COUNT
424 (pasted) -
425 (tb->
426 lbytes -
427 1);
428 }
429
430 /* Append given directory entry to directory item */
431 bi.tb = tb;
432 bi.bi_bh = tb->L[0];
433 bi.bi_parent =
434 tb->FL[0];
435 bi.bi_position =
436 get_left_neighbor_position
437 (tb, 0);
438 leaf_paste_in_buffer
439 (&bi,
440 n + item_pos -
441 ret_val,
442 l_pos_in_item,
443 tb->insert_size[0],
444 body, zeros_num);
445
446 /* previous string prepared space for pasting new entry, following string pastes this entry */
447
448 /* when we have merge directory item, pos_in_item has been changed too */
449
450 /* paste new directory entry. 1 is entry number */
451 leaf_paste_entries(bi.
452 bi_bh,
453 n +
454 item_pos
455 -
456 ret_val,
457 l_pos_in_item,
458 1,
459 (struct
460 reiserfs_de_head
461 *)
462 body,
463 body
464 +
465 DEH_SIZE,
466 tb->
467 insert_size
468 [0]
469 );
470 tb->insert_size[0] = 0;
471 } else {
472 /* new directory item doesn't fall into L[0] */
473 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
474 leaf_shift_left(tb,
475 tb->
476 lnum[0],
477 tb->
478 lbytes);
479 }
480 /* Calculate new position to append in item body */
481 pos_in_item -= tb->lbytes;
482 } else {
483 /* regular object */
484 RFALSE(tb->lbytes <= 0,
485 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
486 tb->lbytes);
487 RFALSE(pos_in_item !=
488 ih_item_len
489 (B_N_PITEM_HEAD
490 (tbS0, item_pos)),
491 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
492 ih_item_len
493 (B_N_PITEM_HEAD
494 (tbS0, item_pos)),
495 pos_in_item);
496
497 if (tb->lbytes >= pos_in_item) {
498 /* appended item will be in L[0] in whole */
499 int l_n;
500
501 /* this bytes number must be appended to the last item of L[h] */
502 l_n =
503 tb->lbytes -
504 pos_in_item;
505
506 /* Calculate new insert_size[0] */
507 tb->insert_size[0] -=
508 l_n;
509
510 RFALSE(tb->
511 insert_size[0] <=
512 0,
513 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
514 tb->
515 insert_size[0]);
516 ret_val =
517 leaf_shift_left(tb,
518 tb->
519 lnum
520 [0],
521 ih_item_len
522 (B_N_PITEM_HEAD
523 (tbS0,
524 item_pos)));
525 /* Append to body of item in L[0] */
526 bi.tb = tb;
527 bi.bi_bh = tb->L[0];
528 bi.bi_parent =
529 tb->FL[0];
530 bi.bi_position =
531 get_left_neighbor_position
532 (tb, 0);
533 leaf_paste_in_buffer
534 (&bi,
535 n + item_pos -
536 ret_val,
537 ih_item_len
538 (B_N_PITEM_HEAD
539 (tb->L[0],
540 n + item_pos -
541 ret_val)), l_n,
542 body,
543 zeros_num >
544 l_n ? l_n :
545 zeros_num);
546 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
547 {
548 int version;
549 int temp_l =
550 l_n;
551
552 RFALSE
553 (ih_item_len
554 (B_N_PITEM_HEAD
555 (tbS0,
556 0)),
557 "PAP-12106: item length must be 0");
558 RFALSE
559 (comp_short_le_keys
560 (B_N_PKEY
561 (tbS0, 0),
562 B_N_PKEY
563 (tb->L[0],
564 n +
565 item_pos
566 -
567 ret_val)),
568 "PAP-12107: items must be of the same file");
569 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
570 temp_l =
571 l_n
572 <<
573 (tb->
574 tb_sb->
575 s_blocksize_bits
576 -
577 UNFM_P_SHIFT);
578 }
579 /* update key of first item in S0 */
580 version =
581 ih_version
582 (B_N_PITEM_HEAD
583 (tbS0, 0));
584 set_le_key_k_offset
585 (version,
586 B_N_PKEY
587 (tbS0, 0),
588 le_key_k_offset
589 (version,
590 B_N_PKEY
591 (tbS0,
592 0)) +
593 temp_l);
594 /* update left delimiting key */
595 set_le_key_k_offset
596 (version,
597 B_N_PDELIM_KEY
598 (tb->
599 CFL[0],
600 tb->
601 lkey[0]),
602 le_key_k_offset
603 (version,
604 B_N_PDELIM_KEY
605 (tb->
606 CFL[0],
607 tb->
608 lkey[0]))
609 + temp_l);
610 }
611
612 /* Calculate new body, position in item and insert_size[0] */
613 if (l_n > zeros_num) {
614 body +=
615 (l_n -
616 zeros_num);
617 zeros_num = 0;
618 } else
619 zeros_num -=
620 l_n;
621 pos_in_item = 0;
622
623 RFALSE
624 (comp_short_le_keys
625 (B_N_PKEY(tbS0, 0),
626 B_N_PKEY(tb->L[0],
627 B_NR_ITEMS
628 (tb->
629 L[0]) -
630 1))
631 ||
632 !op_is_left_mergeable
633 (B_N_PKEY(tbS0, 0),
634 tbS0->b_size)
635 ||
636 !op_is_left_mergeable
637 (B_N_PDELIM_KEY
638 (tb->CFL[0],
639 tb->lkey[0]),
640 tbS0->b_size),
641 "PAP-12120: item must be merge-able with left neighboring item");
642 } else { /* only part of the appended item will be in L[0] */
643
644 /* Calculate position in item for append in S[0] */
645 pos_in_item -=
646 tb->lbytes;
647
648 RFALSE(pos_in_item <= 0,
649 "PAP-12125: no place for paste. pos_in_item=%d",
650 pos_in_item);
651
652 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
653 leaf_shift_left(tb,
654 tb->
655 lnum[0],
656 tb->
657 lbytes);
658 }
659 }
660 } else { /* appended item will be in L[0] in whole */
661
662 struct item_head *pasted;
663
664 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
665 /* then increment pos_in_item by the size of the last item in L[0] */
666 pasted =
667 B_N_PITEM_HEAD(tb->L[0],
668 n - 1);
669 if (is_direntry_le_ih(pasted))
670 pos_in_item +=
671 ih_entry_count
672 (pasted);
673 else
674 pos_in_item +=
675 ih_item_len(pasted);
676 }
677
678 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
679 ret_val =
680 leaf_shift_left(tb, tb->lnum[0],
681 tb->lbytes);
682 /* Append to body of item in L[0] */
683 bi.tb = tb;
684 bi.bi_bh = tb->L[0];
685 bi.bi_parent = tb->FL[0];
686 bi.bi_position =
687 get_left_neighbor_position(tb, 0);
688 leaf_paste_in_buffer(&bi,
689 n + item_pos -
690 ret_val,
691 pos_in_item,
692 tb->insert_size[0],
693 body, zeros_num);
694
695 /* if appended item is directory, paste entry */
696 pasted =
697 B_N_PITEM_HEAD(tb->L[0],
698 n + item_pos -
699 ret_val);
700 if (is_direntry_le_ih(pasted))
701 leaf_paste_entries(bi.bi_bh,
702 n +
703 item_pos -
704 ret_val,
705 pos_in_item,
706 1,
707 (struct
708 reiserfs_de_head
709 *)body,
710 body +
711 DEH_SIZE,
712 tb->
713 insert_size
714 [0]
715 );
716 /* if appended item is indirect item, put unformatted node into un list */
717 if (is_indirect_le_ih(pasted))
718 set_ih_free_space(pasted, 0);
719 tb->insert_size[0] = 0;
720 zeros_num = 0;
721 }
722 break;
723 default: /* cases d and t */
724 reiserfs_panic(tb->tb_sb,
725 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
726 (flag ==
727 M_DELETE) ? "DELETE" : ((flag ==
728 M_CUT)
729 ? "CUT"
730 :
731 "UNKNOWN"),
732 flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700734 } else {
735 /* new item doesn't fall into L[0] */
736 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700740 /* tb->lnum[0] > 0 */
741 /* Calculate new item position */
742 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700744 if (tb->rnum[0] > 0) {
745 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
746 n = B_NR_ITEMS(tbS0);
747 switch (flag) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700749 case M_INSERT: /* insert item */
750 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
751 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
752 loff_t old_key_comp, old_len,
753 r_zeros_number;
754 const char *r_body;
755 int version;
756 loff_t offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700758 leaf_shift_right(tb, tb->rnum[0] - 1,
759 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700761 version = ih_version(ih);
762 /* Remember key component and item length */
763 old_key_comp = le_ih_k_offset(ih);
764 old_len = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700766 /* Calculate key component and item length to insert into R[0] */
767 offset =
768 le_ih_k_offset(ih) +
769 ((old_len -
770 tb->
771 rbytes) << (is_indirect_le_ih(ih)
772 ? tb->tb_sb->
773 s_blocksize_bits -
774 UNFM_P_SHIFT : 0));
775 set_le_ih_k_offset(ih, offset);
776 put_ih_item_len(ih, tb->rbytes);
777 /* Insert part of the item into R[0] */
778 bi.tb = tb;
779 bi.bi_bh = tb->R[0];
780 bi.bi_parent = tb->FR[0];
781 bi.bi_position =
782 get_right_neighbor_position(tb, 0);
783 if ((old_len - tb->rbytes) > zeros_num) {
784 r_zeros_number = 0;
785 r_body =
786 body + (old_len -
787 tb->rbytes) -
788 zeros_num;
789 } else {
790 r_body = body;
791 r_zeros_number =
792 zeros_num - (old_len -
793 tb->rbytes);
794 zeros_num -= r_zeros_number;
795 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700797 leaf_insert_into_buf(&bi, 0, ih, r_body,
798 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700800 /* Replace right delimiting key by first key in R[0] */
801 replace_key(tb, tb->CFR[0], tb->rkey[0],
802 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700804 /* Calculate key component and item length to insert into S[0] */
805 set_le_ih_k_offset(ih, old_key_comp);
806 put_ih_item_len(ih,
807 old_len - tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700809 tb->insert_size[0] -= tb->rbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700811 } else { /* whole new item falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700813 /* Shift rnum[0]-1 items to R[0] */
814 ret_val =
815 leaf_shift_right(tb,
816 tb->rnum[0] - 1,
817 tb->rbytes);
818 /* Insert new item into R[0] */
819 bi.tb = tb;
820 bi.bi_bh = tb->R[0];
821 bi.bi_parent = tb->FR[0];
822 bi.bi_position =
823 get_right_neighbor_position(tb, 0);
824 leaf_insert_into_buf(&bi,
825 item_pos - n +
826 tb->rnum[0] - 1,
827 ih, body,
828 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700830 if (item_pos - n + tb->rnum[0] - 1 == 0) {
831 replace_key(tb, tb->CFR[0],
832 tb->rkey[0],
833 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700835 }
836 zeros_num = tb->insert_size[0] = 0;
837 }
838 } else { /* new item or part of it doesn't fall into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700840 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700842 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700844 case M_PASTE: /* append item */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700846 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
847 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
848 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
849 int entry_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700851 RFALSE(zeros_num,
852 "PAP-12145: invalid parameter in case of a directory");
853 entry_count =
854 I_ENTRY_COUNT(B_N_PITEM_HEAD
855 (tbS0,
856 item_pos));
857 if (entry_count - tb->rbytes <
858 pos_in_item)
859 /* new directory entry falls into R[0] */
860 {
861 int paste_entry_position;
862
863 RFALSE(tb->rbytes - 1 >=
864 entry_count
865 || !tb->
866 insert_size[0],
867 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
868 tb->rbytes,
869 entry_count);
870 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
871 leaf_shift_right(tb,
872 tb->
873 rnum
874 [0],
875 tb->
876 rbytes
877 - 1);
878 /* Paste given directory entry to directory item */
879 paste_entry_position =
880 pos_in_item -
881 entry_count +
882 tb->rbytes - 1;
883 bi.tb = tb;
884 bi.bi_bh = tb->R[0];
885 bi.bi_parent =
886 tb->FR[0];
887 bi.bi_position =
888 get_right_neighbor_position
889 (tb, 0);
890 leaf_paste_in_buffer
891 (&bi, 0,
892 paste_entry_position,
893 tb->insert_size[0],
894 body, zeros_num);
895 /* paste entry */
896 leaf_paste_entries(bi.
897 bi_bh,
898 0,
899 paste_entry_position,
900 1,
901 (struct
902 reiserfs_de_head
903 *)
904 body,
905 body
906 +
907 DEH_SIZE,
908 tb->
909 insert_size
910 [0]
911 );
912
913 if (paste_entry_position
914 == 0) {
915 /* change delimiting keys */
916 replace_key(tb,
917 tb->
918 CFR
919 [0],
920 tb->
921 rkey
922 [0],
923 tb->
924 R
925 [0],
926 0);
927 }
928
929 tb->insert_size[0] = 0;
930 pos_in_item++;
931 } else { /* new directory entry doesn't fall into R[0] */
932
933 leaf_shift_right(tb,
934 tb->
935 rnum
936 [0],
937 tb->
938 rbytes);
939 }
940 } else { /* regular object */
941
942 int n_shift, n_rem,
943 r_zeros_number;
944 const char *r_body;
945
946 /* Calculate number of bytes which must be shifted from appended item */
947 if ((n_shift =
948 tb->rbytes -
949 tb->insert_size[0]) < 0)
950 n_shift = 0;
951
952 RFALSE(pos_in_item !=
953 ih_item_len
954 (B_N_PITEM_HEAD
955 (tbS0, item_pos)),
956 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
957 pos_in_item,
958 ih_item_len
959 (B_N_PITEM_HEAD
960 (tbS0, item_pos)));
961
962 leaf_shift_right(tb,
963 tb->rnum[0],
964 n_shift);
965 /* Calculate number of bytes which must remain in body after appending to R[0] */
966 if ((n_rem =
967 tb->insert_size[0] -
968 tb->rbytes) < 0)
969 n_rem = 0;
970
971 {
972 int version;
973 unsigned long temp_rem =
974 n_rem;
975
976 version =
977 ih_version
978 (B_N_PITEM_HEAD
979 (tb->R[0], 0));
980 if (is_indirect_le_key
981 (version,
982 B_N_PKEY(tb->R[0],
983 0))) {
984 temp_rem =
985 n_rem <<
986 (tb->tb_sb->
987 s_blocksize_bits
988 -
989 UNFM_P_SHIFT);
990 }
991 set_le_key_k_offset
992 (version,
993 B_N_PKEY(tb->R[0],
994 0),
995 le_key_k_offset
996 (version,
997 B_N_PKEY(tb->R[0],
998 0)) +
999 temp_rem);
1000 set_le_key_k_offset
1001 (version,
1002 B_N_PDELIM_KEY(tb->
1003 CFR
1004 [0],
1005 tb->
1006 rkey
1007 [0]),
1008 le_key_k_offset
1009 (version,
1010 B_N_PDELIM_KEY
1011 (tb->CFR[0],
1012 tb->rkey[0])) +
1013 temp_rem);
1014 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1016 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001017 do_balance_mark_internal_dirty
1018 (tb, tb->CFR[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001020 /* Append part of body into R[0] */
1021 bi.tb = tb;
1022 bi.bi_bh = tb->R[0];
1023 bi.bi_parent = tb->FR[0];
1024 bi.bi_position =
1025 get_right_neighbor_position
1026 (tb, 0);
1027 if (n_rem > zeros_num) {
1028 r_zeros_number = 0;
1029 r_body =
1030 body + n_rem -
1031 zeros_num;
1032 } else {
1033 r_body = body;
1034 r_zeros_number =
1035 zeros_num - n_rem;
1036 zeros_num -=
1037 r_zeros_number;
1038 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001040 leaf_paste_in_buffer(&bi, 0,
1041 n_shift,
1042 tb->
1043 insert_size
1044 [0] -
1045 n_rem,
1046 r_body,
1047 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001049 if (is_indirect_le_ih
1050 (B_N_PITEM_HEAD
1051 (tb->R[0], 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052#if 0
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001053 RFALSE(n_rem,
1054 "PAP-12160: paste more than one unformatted node pointer");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001056 set_ih_free_space
1057 (B_N_PITEM_HEAD
1058 (tb->R[0], 0), 0);
1059 }
1060 tb->insert_size[0] = n_rem;
1061 if (!n_rem)
1062 pos_in_item++;
1063 }
1064 } else { /* pasted item in whole falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001066 struct item_head *pasted;
1067
1068 ret_val =
1069 leaf_shift_right(tb, tb->rnum[0],
1070 tb->rbytes);
1071 /* append item in R[0] */
1072 if (pos_in_item >= 0) {
1073 bi.tb = tb;
1074 bi.bi_bh = tb->R[0];
1075 bi.bi_parent = tb->FR[0];
1076 bi.bi_position =
1077 get_right_neighbor_position
1078 (tb, 0);
1079 leaf_paste_in_buffer(&bi,
1080 item_pos -
1081 n +
1082 tb->
1083 rnum[0],
1084 pos_in_item,
1085 tb->
1086 insert_size
1087 [0], body,
1088 zeros_num);
1089 }
1090
1091 /* paste new entry, if item is directory item */
1092 pasted =
1093 B_N_PITEM_HEAD(tb->R[0],
1094 item_pos - n +
1095 tb->rnum[0]);
1096 if (is_direntry_le_ih(pasted)
1097 && pos_in_item >= 0) {
1098 leaf_paste_entries(bi.bi_bh,
1099 item_pos -
1100 n +
1101 tb->rnum[0],
1102 pos_in_item,
1103 1,
1104 (struct
1105 reiserfs_de_head
1106 *)body,
1107 body +
1108 DEH_SIZE,
1109 tb->
1110 insert_size
1111 [0]
1112 );
1113 if (!pos_in_item) {
1114
1115 RFALSE(item_pos - n +
1116 tb->rnum[0],
1117 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1118
1119 /* update delimiting keys */
1120 replace_key(tb,
1121 tb->CFR[0],
1122 tb->rkey[0],
1123 tb->R[0],
1124 0);
1125 }
1126 }
1127
1128 if (is_indirect_le_ih(pasted))
1129 set_ih_free_space(pasted, 0);
1130 zeros_num = tb->insert_size[0] = 0;
1131 }
1132 } else { /* new item doesn't fall into R[0] */
1133
1134 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1135 }
1136 break;
1137 default: /* cases d and t */
1138 reiserfs_panic(tb->tb_sb,
1139 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1140 (flag ==
1141 M_DELETE) ? "DELETE" : ((flag ==
1142 M_CUT) ? "CUT"
1143 : "UNKNOWN"),
1144 flag);
1145 }
1146
1147 }
1148
1149 /* tb->rnum[0] > 0 */
1150 RFALSE(tb->blknum[0] > 3,
1151 "PAP-12180: blknum can not be %d. It must be <= 3",
1152 tb->blknum[0]);
1153 RFALSE(tb->blknum[0] < 0,
1154 "PAP-12185: blknum can not be %d. It must be >= 0",
1155 tb->blknum[0]);
1156
1157 /* if while adding to a node we discover that it is possible to split
1158 it in two, and merge the left part into the left neighbor and the
1159 right part into the right neighbor, eliminating the node */
1160 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1161
1162 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1163 "PAP-12190: lnum and rnum must not be zero");
1164 /* if insertion was done before 0-th position in R[0], right
1165 delimiting key of the tb->L[0]'s and left delimiting key are
1166 not set correctly */
1167 if (tb->CFL[0]) {
1168 if (!tb->CFR[0])
1169 reiserfs_panic(tb->tb_sb,
1170 "vs-12195: balance_leaf: CFR not initialized");
1171 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1172 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1173 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1174 }
1175
1176 reiserfs_invalidate_buffer(tb, tbS0);
1177 return 0;
1178 }
1179
1180 /* Fill new nodes that appear in place of S[0] */
1181
1182 /* I am told that this copying is because we need an array to enable
1183 the looping code. -Hans */
1184 snum[0] = tb->s1num, snum[1] = tb->s2num;
1185 sbytes[0] = tb->s1bytes;
1186 sbytes[1] = tb->s2bytes;
1187 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1188
1189 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1190 snum[i]);
1191
1192 /* here we shift from S to S_new nodes */
1193
1194 S_new[i] = get_FEB(tb);
1195
1196 /* initialized block type and tree level */
1197 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1198
1199 n = B_NR_ITEMS(tbS0);
1200
1201 switch (flag) {
1202 case M_INSERT: /* insert item */
1203
1204 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1205 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1206 int old_key_comp, old_len,
1207 r_zeros_number;
1208 const char *r_body;
1209 int version;
1210
1211 /* Move snum[i]-1 items from S[0] to S_new[i] */
1212 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1213 snum[i] - 1, -1,
1214 S_new[i]);
1215 /* Remember key component and item length */
1216 version = ih_version(ih);
1217 old_key_comp = le_ih_k_offset(ih);
1218 old_len = ih_item_len(ih);
1219
1220 /* Calculate key component and item length to insert into S_new[i] */
1221 set_le_ih_k_offset(ih,
1222 le_ih_k_offset(ih) +
1223 ((old_len -
1224 sbytes[i]) <<
1225 (is_indirect_le_ih
1226 (ih) ? tb->tb_sb->
1227 s_blocksize_bits -
1228 UNFM_P_SHIFT :
1229 0)));
1230
1231 put_ih_item_len(ih, sbytes[i]);
1232
1233 /* Insert part of the item into S_new[i] before 0-th item */
1234 bi.tb = tb;
1235 bi.bi_bh = S_new[i];
1236 bi.bi_parent = NULL;
1237 bi.bi_position = 0;
1238
1239 if ((old_len - sbytes[i]) > zeros_num) {
1240 r_zeros_number = 0;
1241 r_body =
1242 body + (old_len -
1243 sbytes[i]) -
1244 zeros_num;
1245 } else {
1246 r_body = body;
1247 r_zeros_number =
1248 zeros_num - (old_len -
1249 sbytes[i]);
1250 zeros_num -= r_zeros_number;
1251 }
1252
1253 leaf_insert_into_buf(&bi, 0, ih, r_body,
1254 r_zeros_number);
1255
1256 /* Calculate key component and item length to insert into S[i] */
1257 set_le_ih_k_offset(ih, old_key_comp);
1258 put_ih_item_len(ih,
1259 old_len - sbytes[i]);
1260 tb->insert_size[0] -= sbytes[i];
1261 } else { /* whole new item falls into S_new[i] */
1262
1263 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1264 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1265 snum[i] - 1, sbytes[i],
1266 S_new[i]);
1267
1268 /* Insert new item into S_new[i] */
1269 bi.tb = tb;
1270 bi.bi_bh = S_new[i];
1271 bi.bi_parent = NULL;
1272 bi.bi_position = 0;
1273 leaf_insert_into_buf(&bi,
1274 item_pos - n +
1275 snum[i] - 1, ih,
1276 body, zeros_num);
1277
1278 zeros_num = tb->insert_size[0] = 0;
1279 }
1280 }
1281
1282 else { /* new item or it part don't falls into S_new[i] */
1283
1284 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1285 snum[i], sbytes[i], S_new[i]);
1286 }
1287 break;
1288
1289 case M_PASTE: /* append item */
1290
1291 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
1292 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
1293 struct item_head *aux_ih;
1294
1295 RFALSE(ih, "PAP-12210: ih must be 0");
1296
1297 if (is_direntry_le_ih
1298 (aux_ih =
1299 B_N_PITEM_HEAD(tbS0, item_pos))) {
1300 /* we append to directory item */
1301
1302 int entry_count;
1303
1304 entry_count =
1305 ih_entry_count(aux_ih);
1306
1307 if (entry_count - sbytes[i] <
1308 pos_in_item
1309 && pos_in_item <=
1310 entry_count) {
1311 /* new directory entry falls into S_new[i] */
1312
1313 RFALSE(!tb->
1314 insert_size[0],
1315 "PAP-12215: insert_size is already 0");
1316 RFALSE(sbytes[i] - 1 >=
1317 entry_count,
1318 "PAP-12220: there are no so much entries (%d), only %d",
1319 sbytes[i] - 1,
1320 entry_count);
1321
1322 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1323 leaf_move_items
1324 (LEAF_FROM_S_TO_SNEW,
1325 tb, snum[i],
1326 sbytes[i] - 1,
1327 S_new[i]);
1328 /* Paste given directory entry to directory item */
1329 bi.tb = tb;
1330 bi.bi_bh = S_new[i];
1331 bi.bi_parent = NULL;
1332 bi.bi_position = 0;
1333 leaf_paste_in_buffer
1334 (&bi, 0,
1335 pos_in_item -
1336 entry_count +
1337 sbytes[i] - 1,
1338 tb->insert_size[0],
1339 body, zeros_num);
1340 /* paste new directory entry */
1341 leaf_paste_entries(bi.
1342 bi_bh,
1343 0,
1344 pos_in_item
1345 -
1346 entry_count
1347 +
1348 sbytes
1349 [i] -
1350 1, 1,
1351 (struct
1352 reiserfs_de_head
1353 *)
1354 body,
1355 body
1356 +
1357 DEH_SIZE,
1358 tb->
1359 insert_size
1360 [0]
1361 );
1362 tb->insert_size[0] = 0;
1363 pos_in_item++;
1364 } else { /* new directory entry doesn't fall into S_new[i] */
1365 leaf_move_items
1366 (LEAF_FROM_S_TO_SNEW,
1367 tb, snum[i],
1368 sbytes[i],
1369 S_new[i]);
1370 }
1371 } else { /* regular object */
1372
1373 int n_shift, n_rem,
1374 r_zeros_number;
1375 const char *r_body;
1376
1377 RFALSE(pos_in_item !=
1378 ih_item_len
1379 (B_N_PITEM_HEAD
1380 (tbS0, item_pos))
1381 || tb->insert_size[0] <=
1382 0,
1383 "PAP-12225: item too short or insert_size <= 0");
1384
1385 /* Calculate number of bytes which must be shifted from appended item */
1386 n_shift =
1387 sbytes[i] -
1388 tb->insert_size[0];
1389 if (n_shift < 0)
1390 n_shift = 0;
1391 leaf_move_items
1392 (LEAF_FROM_S_TO_SNEW, tb,
1393 snum[i], n_shift,
1394 S_new[i]);
1395
1396 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1397 n_rem =
1398 tb->insert_size[0] -
1399 sbytes[i];
1400 if (n_rem < 0)
1401 n_rem = 0;
1402 /* Append part of body into S_new[0] */
1403 bi.tb = tb;
1404 bi.bi_bh = S_new[i];
1405 bi.bi_parent = NULL;
1406 bi.bi_position = 0;
1407
1408 if (n_rem > zeros_num) {
1409 r_zeros_number = 0;
1410 r_body =
1411 body + n_rem -
1412 zeros_num;
1413 } else {
1414 r_body = body;
1415 r_zeros_number =
1416 zeros_num - n_rem;
1417 zeros_num -=
1418 r_zeros_number;
1419 }
1420
1421 leaf_paste_in_buffer(&bi, 0,
1422 n_shift,
1423 tb->
1424 insert_size
1425 [0] -
1426 n_rem,
1427 r_body,
1428 r_zeros_number);
1429 {
1430 struct item_head *tmp;
1431
1432 tmp =
1433 B_N_PITEM_HEAD(S_new
1434 [i],
1435 0);
1436 if (is_indirect_le_ih
1437 (tmp)) {
1438 set_ih_free_space
1439 (tmp, 0);
1440 set_le_ih_k_offset
1441 (tmp,
1442 le_ih_k_offset
1443 (tmp) +
1444 (n_rem <<
1445 (tb->
1446 tb_sb->
1447 s_blocksize_bits
1448 -
1449 UNFM_P_SHIFT)));
1450 } else {
1451 set_le_ih_k_offset
1452 (tmp,
1453 le_ih_k_offset
1454 (tmp) +
1455 n_rem);
1456 }
1457 }
1458
1459 tb->insert_size[0] = n_rem;
1460 if (!n_rem)
1461 pos_in_item++;
1462 }
1463 } else
1464 /* item falls wholly into S_new[i] */
1465 {
1466 int ret_val;
1467 struct item_head *pasted;
1468
1469#ifdef CONFIG_REISERFS_CHECK
1470 struct item_head *ih =
1471 B_N_PITEM_HEAD(tbS0, item_pos);
1472
1473 if (!is_direntry_le_ih(ih)
1474 && (pos_in_item != ih_item_len(ih)
1475 || tb->insert_size[0] <= 0))
1476 reiserfs_panic(tb->tb_sb,
1477 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1478#endif /* CONFIG_REISERFS_CHECK */
1479
1480 ret_val =
1481 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1482 tb, snum[i],
1483 sbytes[i],
1484 S_new[i]);
1485
1486 RFALSE(ret_val,
1487 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1488 ret_val);
1489
1490 /* paste into item */
1491 bi.tb = tb;
1492 bi.bi_bh = S_new[i];
1493 bi.bi_parent = NULL;
1494 bi.bi_position = 0;
1495 leaf_paste_in_buffer(&bi,
1496 item_pos - n +
1497 snum[i],
1498 pos_in_item,
1499 tb->insert_size[0],
1500 body, zeros_num);
1501
1502 pasted =
1503 B_N_PITEM_HEAD(S_new[i],
1504 item_pos - n +
1505 snum[i]);
1506 if (is_direntry_le_ih(pasted)) {
1507 leaf_paste_entries(bi.bi_bh,
1508 item_pos -
1509 n + snum[i],
1510 pos_in_item,
1511 1,
1512 (struct
1513 reiserfs_de_head
1514 *)body,
1515 body +
1516 DEH_SIZE,
1517 tb->
1518 insert_size
1519 [0]
1520 );
1521 }
1522
1523 /* if we paste to indirect item update ih_free_space */
1524 if (is_indirect_le_ih(pasted))
1525 set_ih_free_space(pasted, 0);
1526 zeros_num = tb->insert_size[0] = 0;
1527 }
1528 }
1529
1530 else { /* pasted item doesn't fall into S_new[i] */
1531
1532 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1533 snum[i], sbytes[i], S_new[i]);
1534 }
1535 break;
1536 default: /* cases d and t */
1537 reiserfs_panic(tb->tb_sb,
1538 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1539 (flag ==
1540 M_DELETE) ? "DELETE" : ((flag ==
1541 M_CUT) ? "CUT"
1542 : "UNKNOWN"),
1543 flag);
1544 }
1545
1546 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1547 insert_ptr[i] = S_new[i];
1548
1549 RFALSE(!buffer_journaled(S_new[i])
1550 || buffer_journal_dirty(S_new[i])
1551 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1552 i, S_new[i]);
1553 }
1554
1555 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1556 affected item which remains in S */
1557 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1558
1559 switch (flag) {
1560 case M_INSERT: /* insert item into S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 bi.tb = tb;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001562 bi.bi_bh = tbS0;
1563 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1564 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1565 leaf_insert_into_buf(&bi, item_pos, ih, body,
1566 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001568 /* If we insert the first key change the delimiting key */
1569 if (item_pos == 0) {
1570 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1571 replace_key(tb, tb->CFL[0], tb->lkey[0],
1572 tbS0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001575 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001577 case M_PASTE:{ /* append item in S[0] */
1578 struct item_head *pasted;
1579
1580 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1581 /* when directory, may be new entry already pasted */
1582 if (is_direntry_le_ih(pasted)) {
1583 if (pos_in_item >= 0 &&
1584 pos_in_item <=
1585 ih_entry_count(pasted)) {
1586
1587 RFALSE(!tb->insert_size[0],
1588 "PAP-12260: insert_size is 0 already");
1589
1590 /* prepare space */
1591 bi.tb = tb;
1592 bi.bi_bh = tbS0;
1593 bi.bi_parent =
1594 PATH_H_PPARENT(tb->tb_path,
1595 0);
1596 bi.bi_position =
1597 PATH_H_POSITION(tb->tb_path,
1598 1);
1599 leaf_paste_in_buffer(&bi,
1600 item_pos,
1601 pos_in_item,
1602 tb->
1603 insert_size
1604 [0], body,
1605 zeros_num);
1606
1607 /* paste entry */
1608 leaf_paste_entries(bi.bi_bh,
1609 item_pos,
1610 pos_in_item,
1611 1,
1612 (struct
1613 reiserfs_de_head
1614 *)body,
1615 body +
1616 DEH_SIZE,
1617 tb->
1618 insert_size
1619 [0]
1620 );
1621 if (!item_pos && !pos_in_item) {
1622 RFALSE(!tb->CFL[0]
1623 || !tb->L[0],
1624 "PAP-12270: CFL[0]/L[0] must be specified");
1625 if (tb->CFL[0]) {
1626 replace_key(tb,
1627 tb->
1628 CFL
1629 [0],
1630 tb->
1631 lkey
1632 [0],
1633 tbS0,
1634 0);
1635
1636 }
1637 }
1638 tb->insert_size[0] = 0;
1639 }
1640 } else { /* regular object */
1641 if (pos_in_item == ih_item_len(pasted)) {
1642
1643 RFALSE(tb->insert_size[0] <= 0,
1644 "PAP-12275: insert size must not be %d",
1645 tb->insert_size[0]);
1646 bi.tb = tb;
1647 bi.bi_bh = tbS0;
1648 bi.bi_parent =
1649 PATH_H_PPARENT(tb->tb_path,
1650 0);
1651 bi.bi_position =
1652 PATH_H_POSITION(tb->tb_path,
1653 1);
1654 leaf_paste_in_buffer(&bi,
1655 item_pos,
1656 pos_in_item,
1657 tb->
1658 insert_size
1659 [0], body,
1660 zeros_num);
1661
1662 if (is_indirect_le_ih(pasted)) {
1663#if 0
1664 RFALSE(tb->
1665 insert_size[0] !=
1666 UNFM_P_SIZE,
1667 "PAP-12280: insert_size for indirect item must be %d, not %d",
1668 UNFM_P_SIZE,
1669 tb->
1670 insert_size[0]);
1671#endif
1672 set_ih_free_space
1673 (pasted, 0);
1674 }
1675 tb->insert_size[0] = 0;
1676 }
1677#ifdef CONFIG_REISERFS_CHECK
1678 else {
1679 if (tb->insert_size[0]) {
1680 print_cur_tb("12285");
1681 reiserfs_panic(tb->
1682 tb_sb,
1683 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1684 tb->
1685 insert_size
1686 [0]);
1687 }
1688 }
1689#endif /* CONFIG_REISERFS_CHECK */
1690
1691 }
1692 } /* case M_PASTE: */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001695#ifdef CONFIG_REISERFS_CHECK
1696 if (flag == M_PASTE && tb->insert_size[0]) {
1697 print_cur_tb("12290");
1698 reiserfs_panic(tb->tb_sb,
1699 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1700 tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001702#endif /* CONFIG_REISERFS_CHECK */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 return 0;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001705} /* Leaf level of the tree is balanced (end of balance_leaf) */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
1707/* Make empty node */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001708void make_empty_node(struct buffer_info *bi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001710 struct block_head *blkh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001712 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001714 blkh = B_BLK_HEAD(bi->bi_bh);
1715 set_blkh_nr_item(blkh, 0);
1716 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001718 if (bi->bi_parent)
1719 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720}
1721
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722/* Get first empty buffer */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001723struct buffer_head *get_FEB(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001725 int i;
1726 struct buffer_head *first_b;
1727 struct buffer_info bi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001729 for (i = 0; i < MAX_FEB_SIZE; i++)
1730 if (tb->FEB[i] != 0)
1731 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001733 if (i == MAX_FEB_SIZE)
1734 reiserfs_panic(tb->tb_sb,
1735 "vs-12300: get_FEB: FEB list is empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001737 bi.tb = tb;
1738 bi.bi_bh = first_b = tb->FEB[i];
1739 bi.bi_parent = NULL;
1740 bi.bi_position = 0;
1741 make_empty_node(&bi);
1742 set_buffer_uptodate(first_b);
1743 tb->FEB[i] = NULL;
1744 tb->used[i] = first_b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001746 return (first_b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747}
1748
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749/* This is now used because reiserfs_free_block has to be able to
1750** schedule.
1751*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001752static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001754 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001756 if (buffer_dirty(bh))
1757 reiserfs_warning(tb->tb_sb,
1758 "store_thrown deals with dirty buffer");
1759 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++)
1760 if (!tb->thrown[i]) {
1761 tb->thrown[i] = bh;
1762 get_bh(bh); /* free_thrown puts this */
1763 return;
1764 }
1765 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766}
1767
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001768static void free_thrown(struct tree_balance *tb)
1769{
1770 int i;
1771 b_blocknr_t blocknr;
1772 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) {
1773 if (tb->thrown[i]) {
1774 blocknr = tb->thrown[i]->b_blocknr;
1775 if (buffer_dirty(tb->thrown[i]))
1776 reiserfs_warning(tb->tb_sb,
1777 "free_thrown deals with dirty buffer %d",
1778 blocknr);
1779 brelse(tb->thrown[i]); /* incremented in store_thrown */
1780 reiserfs_free_block(tb->transaction_handle, NULL,
1781 blocknr, 0);
1782 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784}
1785
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001786void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001788 struct block_head *blkh;
1789 blkh = B_BLK_HEAD(bh);
1790 set_blkh_level(blkh, FREE_LEVEL);
1791 set_blkh_nr_item(blkh, 0);
1792
1793 clear_buffer_dirty(bh);
1794 store_thrown(tb, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795}
1796
1797/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001798void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1799 struct buffer_head *src, int n_src)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800{
1801
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001802 RFALSE(dest == NULL || src == NULL,
1803 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1804 src, dest);
1805 RFALSE(!B_IS_KEYS_LEVEL(dest),
1806 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1807 dest);
1808 RFALSE(n_dest < 0 || n_src < 0,
1809 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1810 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1811 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1812 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001814 if (B_IS_ITEMS_LEVEL(src))
1815 /* source buffer contains leaf node */
1816 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1817 KEY_SIZE);
1818 else
1819 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1820 KEY_SIZE);
1821
1822 do_balance_mark_internal_dirty(tb, dest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823}
1824
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001825int get_left_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001827 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001829 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0,
1830 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1831 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001833 if (Sh_position == 0)
1834 return B_NR_ITEMS(tb->FL[h]);
1835 else
1836 return Sh_position - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837}
1838
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001839int get_right_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001841 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001843 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0,
1844 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1845 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001847 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1848 return 0;
1849 else
1850 return Sh_position + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851}
1852
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853#ifdef CONFIG_REISERFS_CHECK
1854
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001855int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1856static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1857 char *mes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001859 struct disk_child *dc;
1860 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001862 RFALSE(!bh, "PAP-12336: bh == 0");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001864 if (!bh || !B_IS_IN_TREE(bh))
1865 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001867 RFALSE(!buffer_dirty(bh) &&
1868 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1869 "PAP-12337: buffer (%b) must be dirty", bh);
1870 dc = B_N_CHILD(bh, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001872 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1873 if (!is_reusable(s, dc_block_number(dc), 1)) {
1874 print_cur_tb(mes);
1875 reiserfs_panic(s,
1876 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1877 dc, bh);
1878 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001880}
1881
1882static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1883{
1884 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1885 !B_IS_IN_TREE(bh)) {
1886 reiserfs_warning(NULL,
1887 "vs-12339: locked_or_not_in_tree: %s (%b)",
1888 which, bh);
1889 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001891 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892}
1893
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001894static int check_before_balancing(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001896 int retval = 0;
1897
1898 if (cur_tb) {
1899 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1900 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1901 "do_balance cannot properly handle schedule occurring while it runs.");
1902 }
1903
1904 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1905 prepped all of these for us). */
1906 if (tb->lnum[0]) {
1907 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1908 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1909 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1910 check_leaf(tb->L[0]);
1911 }
1912 if (tb->rnum[0]) {
1913 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1914 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1915 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1916 check_leaf(tb->R[0]);
1917 }
1918 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1919 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1920
1921 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922}
1923
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001924static void check_after_balance_leaf(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001926 if (tb->lnum[0]) {
1927 if (B_FREE_SPACE(tb->L[0]) !=
1928 MAX_CHILD_SIZE(tb->L[0]) -
1929 dc_size(B_N_CHILD
1930 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1931 print_cur_tb("12221");
1932 reiserfs_panic(tb->tb_sb,
1933 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1934 }
1935 }
1936 if (tb->rnum[0]) {
1937 if (B_FREE_SPACE(tb->R[0]) !=
1938 MAX_CHILD_SIZE(tb->R[0]) -
1939 dc_size(B_N_CHILD
1940 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1941 print_cur_tb("12222");
1942 reiserfs_panic(tb->tb_sb,
1943 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1944 }
1945 }
1946 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1947 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1948 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1949 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1950 PATH_H_POSITION(tb->tb_path, 1)))))) {
1951 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1952 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1953 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1954 PATH_H_POSITION(tb->tb_path,
1955 1))));
1956 print_cur_tb("12223");
1957 reiserfs_warning(tb->tb_sb,
1958 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1959 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1960 left,
1961 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1962 PATH_H_PBUFFER(tb->tb_path, 1),
1963 PATH_H_POSITION(tb->tb_path, 1),
1964 dc_size(B_N_CHILD
1965 (PATH_H_PBUFFER(tb->tb_path, 1),
1966 PATH_H_POSITION(tb->tb_path, 1))),
1967 right);
1968 reiserfs_panic(tb->tb_sb,
1969 "PAP-12365: check_after_balance_leaf: S is incorrect");
1970 }
1971}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001973static void check_leaf_level(struct tree_balance *tb)
1974{
1975 check_leaf(tb->L[0]);
1976 check_leaf(tb->R[0]);
1977 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1978}
1979
1980static void check_internal_levels(struct tree_balance *tb)
1981{
1982 int h;
1983
1984 /* check all internal nodes */
1985 for (h = 1; tb->insert_size[h]; h++) {
1986 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1987 "BAD BUFFER ON PATH");
1988 if (tb->lnum[h])
1989 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1990 if (tb->rnum[h])
1991 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1992 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993
1994}
1995
1996#endif
1997
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998/* Now we have all of the buffers that must be used in balancing of
1999 the tree. We rely on the assumption that schedule() will not occur
2000 while do_balance works. ( Only interrupt handlers are acceptable.)
2001 We balance the tree according to the analysis made before this,
2002 using buffers already obtained. For SMP support it will someday be
2003 necessary to add ordered locking of tb. */
2004
2005/* Some interesting rules of balancing:
2006
2007 we delete a maximum of two nodes per level per balancing: we never
2008 delete R, when we delete two of three nodes L, S, R then we move
2009 them into R.
2010
2011 we only delete L if we are deleting two nodes, if we delete only
2012 one node we delete S
2013
2014 if we shift leaves then we shift as much as we can: this is a
2015 deliberate policy of extremism in node packing which results in
2016 higher average utilization after repeated random balance operations
2017 at the cost of more memory copies and more balancing as a result of
2018 small insertions to full nodes.
2019
2020 if we shift internal nodes we try to evenly balance the node
2021 utilization, with consequent less balancing at the cost of lower
2022 utilization.
2023
2024 one could argue that the policy for directories in leaves should be
2025 that of internal nodes, but we will wait until another day to
2026 evaluate this.... It would be nice to someday measure and prove
2027 these assumptions as to what is optimal....
2028
2029*/
2030
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002031static inline void do_balance_starts(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002033 /* use print_cur_tb() to see initial state of struct
2034 tree_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002036 /* store_print_tb (tb); */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002038 /* do not delete, just comment it out */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2040 "check");*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002041 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002043 cur_tb = tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044#endif
2045}
2046
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002047static inline void do_balance_completed(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002049
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002051 check_leaf_level(tb);
2052 check_internal_levels(tb);
2053 cur_tb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054#endif
2055
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002056 /* reiserfs_free_block is no longer schedule safe. So, we need to
2057 ** put the buffers we want freed on the thrown list during do_balance,
2058 ** and then free them now
2059 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002061 REISERFS_SB(tb->tb_sb)->s_do_balance++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002063 /* release all nodes hold to perform the balancing */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 unfix_nodes(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002066 free_thrown(tb);
2067}
2068
2069void do_balance(struct tree_balance *tb, /* tree_balance structure */
2070 struct item_head *ih, /* item header of inserted item */
2071 const char *body, /* body of inserted item or bytes to paste */
2072 int flag)
2073{ /* i - insert, d - delete
2074 c - cut, p - paste
2075
2076 Cut means delete part of an item
2077 (includes removing an entry from a
2078 directory).
2079
2080 Delete means delete whole item.
2081
2082 Insert means add a new item into the
2083 tree.
2084
2085 Paste means to append to the end of an
2086 existing file or to insert a directory
2087 entry. */
2088 int child_pos, /* position of a child node in its parent */
2089 h; /* level of the tree being processed */
2090 struct item_head insert_key[2]; /* in our processing of one level
2091 we sometimes determine what
2092 must be inserted into the next
2093 higher level. This insertion
2094 consists of a key or two keys
2095 and their corresponding
2096 pointers */
2097 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2098 level */
2099
2100 tb->tb_mode = flag;
2101 tb->need_balance_dirty = 0;
2102
2103 if (FILESYSTEM_CHANGED_TB(tb)) {
2104 reiserfs_panic(tb->tb_sb,
2105 "clm-6000: do_balance, fs generation has changed\n");
2106 }
2107 /* if we have no real work to do */
2108 if (!tb->insert_size[0]) {
2109 reiserfs_warning(tb->tb_sb,
2110 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2111 flag);
2112 unfix_nodes(tb);
2113 return;
2114 }
2115
2116 atomic_inc(&(fs_generation(tb->tb_sb)));
2117 do_balance_starts(tb);
2118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 /* balance leaf returns 0 except if combining L R and S into
2120 one node. see balance_internal() for explanation of this
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002121 line of code. */
2122 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
2123 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124
2125#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002126 check_after_balance_leaf(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127#endif
2128
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002129 /* Balance internal level of the tree. */
2130 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
2131 child_pos =
2132 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002134 do_balance_completed(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135
2136}