blob: 2118db2896c7c2491777f10ec15d0842e87a2ce2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree */
6/* Further calculations can not cause schedule(), and thus the buffer */
7/* tree will be stable until the balancing will be finished */
8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */
10
11
12/**
13 ** balance_leaf_when_delete
14 ** balance_leaf
15 ** do_balance
16 **
17 **/
18
19#include <linux/config.h>
20#include <asm/uaccess.h>
21#include <linux/time.h>
22#include <linux/reiserfs_fs.h>
23#include <linux/buffer_head.h>
24
25#ifdef CONFIG_REISERFS_CHECK
26
27struct tree_balance * cur_tb = NULL; /* detects whether more than one
28 copy of tb exists as a means
29 of checking whether schedule
30 is interrupting do_balance */
31#endif
32
33inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
34 struct buffer_head * bh, int flag)
35{
36 journal_mark_dirty(tb->transaction_handle,
37 tb->transaction_handle->t_super, bh) ;
38}
39
40#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
41#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
42
43
44/* summary:
45 if deleting something ( tb->insert_size[0] < 0 )
46 return(balance_leaf_when_delete()); (flag d handled here)
47 else
48 if lnum is larger than 0 we put items into the left node
49 if rnum is larger than 0 we put items into the right node
50 if snum1 is larger than 0 we put items into the new node s1
51 if snum2 is larger than 0 we put items into the new node s2
52Note that all *num* count new items being created.
53
54It would be easier to read balance_leaf() if each of these summary
55lines was a separate procedure rather than being inlined. I think
56that there are many passages here and in balance_leaf_when_delete() in
57which two calls to one procedure can replace two passages, and it
58might save cache space and improve software maintenance costs to do so.
59
60Vladimir made the perceptive comment that we should offload most of
61the decision making in this function into fix_nodes/check_balance, and
62then create some sort of structure in tb that says what actions should
63be performed by do_balance.
64
65-Hans */
66
67
68
69/* Balance leaf node in case of delete or cut: insert_size[0] < 0
70 *
71 * lnum, rnum can have values >= -1
72 * -1 means that the neighbor must be joined with S
73 * 0 means that nothing should be done with the neighbor
74 * >0 means to shift entirely or partly the specified number of items to the neighbor
75 */
76static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
77{
78 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
79 int item_pos = PATH_LAST_POSITION (tb->tb_path);
80 int pos_in_item = tb->tb_path->pos_in_item;
81 struct buffer_info bi;
82 int n;
83 struct item_head * ih;
84
85 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
86 "vs- 12000: level: wrong FR %z", tb->FR[0]);
87 RFALSE( tb->blknum[0] > 1,
88 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
89 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
90 "PAP-12010: tree can not be empty");
91
92 ih = B_N_PITEM_HEAD (tbS0, item_pos);
93
94 /* Delete or truncate the item */
95
96 switch (flag) {
97 case M_DELETE: /* delete item in S[0] */
98
99 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
100 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
101 -tb->insert_size [0], ih);
102
103 bi.tb = tb;
104 bi.bi_bh = tbS0;
105 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
106 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
107 leaf_delete_items (&bi, 0, item_pos, 1, -1);
108
109 if ( ! item_pos && tb->CFL[0] ) {
110 if ( B_NR_ITEMS(tbS0) ) {
111 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
112 }
113 else {
114 if ( ! PATH_H_POSITION (tb->tb_path, 1) )
115 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
116 }
117 }
118
119 RFALSE( ! item_pos && !tb->CFL[0],
120 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
121
122 break;
123
124 case M_CUT: { /* cut item in S[0] */
125 bi.tb = tb;
126 bi.bi_bh = tbS0;
127 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
128 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
129 if (is_direntry_le_ih (ih)) {
130
131 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
132 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
133 tb->insert_size[0] = -1;
134 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
135
136 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
139
140 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
141 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
142 }
143 } else {
144 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
145
146 RFALSE( ! ih_item_len(ih),
147 "PAP-12035: cut must leave non-zero dynamic length of item");
148 }
149 break;
150 }
151
152 default:
153 print_cur_tb ("12040");
154 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
155 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
156 }
157
158 /* the rule is that no shifting occurs unless by shifting a node can be freed */
159 n = B_NR_ITEMS(tbS0);
160 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
161 {
162 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
163 {
164 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
165 {
166 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
167 {
168 /* all contents of all the 3 buffers will be in L[0] */
169 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
170 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
171
172 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL);
173 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL);
174
175 reiserfs_invalidate_buffer (tb, tbS0);
176 reiserfs_invalidate_buffer (tb, tb->R[0]);
177
178 return 0;
179 }
180 /* all contents of all the 3 buffers will be in R[0] */
181 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL);
182 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL);
183
184 /* right_delimiting_key is correct in R[0] */
185 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
186
187 reiserfs_invalidate_buffer (tb, tbS0);
188 reiserfs_invalidate_buffer (tb, tb->L[0]);
189
190 return -1;
191 }
192
193 RFALSE( tb->rnum[0] != 0,
194 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
195 /* all contents of L[0] and S[0] will be in L[0] */
196 leaf_shift_left(tb, n, -1);
197
198 reiserfs_invalidate_buffer (tb, tbS0);
199
200 return 0;
201 }
202 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
203
204 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
205 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
206 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
207 tb->rnum[0], tb->lnum[0], n);
208 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
209 (tb->lbytes != -1 || tb->rbytes != -1),
210 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
211 tb->rbytes, tb->lbytes);
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
213 (tb->lbytes < 1 || tb->rbytes != -1),
214 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
215 tb->rbytes, tb->lbytes);
216
217 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
218 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
219
220 reiserfs_invalidate_buffer (tb, tbS0);
221
222 return 0;
223 }
224
225 if ( tb->rnum[0] == -1 ) {
226 /* all contents of R[0] and S[0] will be in R[0] */
227 leaf_shift_right(tb, n, -1);
228 reiserfs_invalidate_buffer (tb, tbS0);
229 return 0;
230 }
231
232 RFALSE( tb->rnum[0],
233 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
234 return 0;
235}
236
237
238static int balance_leaf (struct tree_balance * tb,
239 struct item_head * ih, /* item header of inserted item (this is on little endian) */
240 const char * body, /* body of inserted item or bytes to paste */
241 int flag, /* i - insert, d - delete, c - cut, p - paste
242 (see comment to do_balance) */
243 struct item_head * insert_key, /* in our processing of one level we sometimes determine what
244 must be inserted into the next higher level. This insertion
245 consists of a key or two keys and their corresponding
246 pointers */
247 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
248 )
249{
250 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
251 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0]
252 of the affected item */
253 struct buffer_info bi;
254 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
255 int snum[2]; /* number of items that will be placed
256 into S_new (includes partially shifted
257 items) */
258 int sbytes[2]; /* if an item is partially shifted into S_new then
259 if it is a directory item
260 it is the number of entries from the item that are shifted into S_new
261 else
262 it is the number of bytes from the item that are shifted into S_new
263 */
264 int n, i;
265 int ret_val;
266 int pos_in_item;
267 int zeros_num;
268
269 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
270
271 /* Make balance in case insert_size[0] < 0 */
272 if ( tb->insert_size[0] < 0 )
273 return balance_leaf_when_delete (tb, flag);
274
275 zeros_num = 0;
276 if (flag == M_INSERT && body == 0)
277 zeros_num = ih_item_len( ih );
278
279 pos_in_item = tb->tb_path->pos_in_item;
280 /* for indirect item pos_in_item is measured in unformatted node
281 pointers. Recalculate to bytes */
282 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
283 pos_in_item *= UNFM_P_SIZE;
284
285 if ( tb->lnum[0] > 0 ) {
286 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
287 if ( item_pos < tb->lnum[0] ) {
288 /* new item or it part falls to L[0], shift it too */
289 n = B_NR_ITEMS(tb->L[0]);
290
291 switch (flag) {
292 case M_INSERT: /* insert item into L[0] */
293
294 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
295 /* part of new item falls into L[0] */
296 int new_item_len;
297 int version;
298
299 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
300
301 /* Calculate item length to insert to S[0] */
302 new_item_len = ih_item_len(ih) - tb->lbytes;
303 /* Calculate and check item length to insert to L[0] */
304 put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
305
306 RFALSE( ih_item_len(ih) <= 0,
307 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
308 ih_item_len(ih));
309
310 /* Insert new item into L[0] */
311 bi.tb = tb;
312 bi.bi_bh = tb->L[0];
313 bi.bi_parent = tb->FL[0];
314 bi.bi_position = get_left_neighbor_position (tb, 0);
315 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
316 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
317
318 version = ih_version (ih);
319
320 /* Calculate key component, item length and body to insert into S[0] */
321 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
322
323 put_ih_item_len( ih, new_item_len );
324 if ( tb->lbytes > zeros_num ) {
325 body += (tb->lbytes - zeros_num);
326 zeros_num = 0;
327 }
328 else
329 zeros_num -= tb->lbytes;
330
331 RFALSE( ih_item_len(ih) <= 0,
332 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
333 ih_item_len(ih));
334 } else {
335 /* new item in whole falls into L[0] */
336 /* Shift lnum[0]-1 items to L[0] */
337 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
338 /* Insert new item into L[0] */
339 bi.tb = tb;
340 bi.bi_bh = tb->L[0];
341 bi.bi_parent = tb->FL[0];
342 bi.bi_position = get_left_neighbor_position (tb, 0);
343 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
344 tb->insert_size[0] = 0;
345 zeros_num = 0;
346 }
347 break;
348
349 case M_PASTE: /* append item in L[0] */
350
351 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
352 /* we must shift the part of the appended item */
353 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
354
355 RFALSE( zeros_num,
356 "PAP-12090: invalid parameter in case of a directory");
357 /* directory item */
358 if ( tb->lbytes > pos_in_item ) {
359 /* new directory entry falls into L[0] */
360 struct item_head * pasted;
361 int l_pos_in_item = pos_in_item;
362
363 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
364 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
365 if ( ret_val && ! item_pos ) {
366 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
367 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
368 }
369
370 /* Append given directory entry to directory item */
371 bi.tb = tb;
372 bi.bi_bh = tb->L[0];
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position = get_left_neighbor_position (tb, 0);
375 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
376 tb->insert_size[0], body, zeros_num);
377
378 /* previous string prepared space for pasting new entry, following string pastes this entry */
379
380 /* when we have merge directory item, pos_in_item has been changed too */
381
382 /* paste new directory entry. 1 is entry number */
383 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
384 (struct reiserfs_de_head *)body,
385 body + DEH_SIZE, tb->insert_size[0]
386 );
387 tb->insert_size[0] = 0;
388 } else {
389 /* new directory item doesn't fall into L[0] */
390 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
391 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
392 }
393 /* Calculate new position to append in item body */
394 pos_in_item -= tb->lbytes;
395 }
396 else {
397 /* regular object */
398 RFALSE( tb->lbytes <= 0,
399 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
400 tb->lbytes);
401 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
402 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
404
405 if ( tb->lbytes >= pos_in_item ) {
406 /* appended item will be in L[0] in whole */
407 int l_n;
408
409 /* this bytes number must be appended to the last item of L[h] */
410 l_n = tb->lbytes - pos_in_item;
411
412 /* Calculate new insert_size[0] */
413 tb->insert_size[0] -= l_n;
414
415 RFALSE( tb->insert_size[0] <= 0,
416 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417 tb->insert_size[0]);
418 ret_val = leaf_shift_left(tb,tb->lnum[0],
419 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
420 /* Append to body of item in L[0] */
421 bi.tb = tb;
422 bi.bi_bh = tb->L[0];
423 bi.bi_parent = tb->FL[0];
424 bi.bi_position = get_left_neighbor_position (tb, 0);
425 leaf_paste_in_buffer(
426 &bi,n + item_pos - ret_val,
427 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
428 l_n,body, zeros_num > l_n ? l_n : zeros_num
429 );
430 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
431 {
432 int version;
433 int temp_l = l_n;
434
435 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
436 "PAP-12106: item length must be 0");
437 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
438 B_N_PKEY (tb->L[0],
439 n + item_pos - ret_val)),
440 "PAP-12107: items must be of the same file");
441 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
442 n + item_pos - ret_val))) {
443 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
444 }
445 /* update key of first item in S0 */
446 version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
447 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
448 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
449 /* update left delimiting key */
450 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
451 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
452 }
453
454 /* Calculate new body, position in item and insert_size[0] */
455 if ( l_n > zeros_num ) {
456 body += (l_n - zeros_num);
457 zeros_num = 0;
458 }
459 else
460 zeros_num -= l_n;
461 pos_in_item = 0;
462
463 RFALSE( comp_short_le_keys
464 (B_N_PKEY(tbS0,0),
465 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
466
467 !op_is_left_mergeable
468 (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
469 !op_is_left_mergeable
470 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
471 tbS0->b_size),
472 "PAP-12120: item must be merge-able with left neighboring item");
473 }
474 else /* only part of the appended item will be in L[0] */
475 {
476 /* Calculate position in item for append in S[0] */
477 pos_in_item -= tb->lbytes;
478
479 RFALSE( pos_in_item <= 0,
480 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
481
482 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
483 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
484 }
485 }
486 }
487 else /* appended item will be in L[0] in whole */
488 {
489 struct item_head * pasted;
490
491 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
492 { /* if we paste into first item of S[0] and it is left mergable */
493 /* then increment pos_in_item by the size of the last item in L[0] */
494 pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
495 if ( is_direntry_le_ih (pasted) )
496 pos_in_item += ih_entry_count(pasted);
497 else
498 pos_in_item += ih_item_len(pasted);
499 }
500
501 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
502 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
503 /* Append to body of item in L[0] */
504 bi.tb = tb;
505 bi.bi_bh = tb->L[0];
506 bi.bi_parent = tb->FL[0];
507 bi.bi_position = get_left_neighbor_position (tb, 0);
508 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
509 body, zeros_num);
510
511 /* if appended item is directory, paste entry */
512 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
513 if (is_direntry_le_ih (pasted))
514 leaf_paste_entries (
515 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
516 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
517 );
518 /* if appended item is indirect item, put unformatted node into un list */
519 if (is_indirect_le_ih (pasted))
520 set_ih_free_space (pasted, 0);
521 tb->insert_size[0] = 0;
522 zeros_num = 0;
523 }
524 break;
525 default: /* cases d and t */
526 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
527 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
528 }
529 } else {
530 /* new item doesn't fall into L[0] */
531 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
532 }
533 } /* tb->lnum[0] > 0 */
534
535 /* Calculate new item position */
536 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
537
538 if ( tb->rnum[0] > 0 ) {
539 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
540 n = B_NR_ITEMS(tbS0);
541 switch ( flag ) {
542
543 case M_INSERT: /* insert item */
544 if ( n - tb->rnum[0] < item_pos )
545 { /* new item or its part falls to R[0] */
546 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
547 { /* part of new item falls into R[0] */
548 loff_t old_key_comp, old_len, r_zeros_number;
549 const char * r_body;
550 int version;
551 loff_t offset;
552
553 leaf_shift_right(tb,tb->rnum[0]-1,-1);
554
555 version = ih_version(ih);
556 /* Remember key component and item length */
557 old_key_comp = le_ih_k_offset( ih );
558 old_len = ih_item_len(ih);
559
560 /* Calculate key component and item length to insert into R[0] */
561 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0));
562 set_le_ih_k_offset( ih, offset );
563 put_ih_item_len( ih, tb->rbytes);
564 /* Insert part of the item into R[0] */
565 bi.tb = tb;
566 bi.bi_bh = tb->R[0];
567 bi.bi_parent = tb->FR[0];
568 bi.bi_position = get_right_neighbor_position (tb, 0);
569 if ( (old_len - tb->rbytes) > zeros_num ) {
570 r_zeros_number = 0;
571 r_body = body + (old_len - tb->rbytes) - zeros_num;
572 }
573 else {
574 r_body = body;
575 r_zeros_number = zeros_num - (old_len - tb->rbytes);
576 zeros_num -= r_zeros_number;
577 }
578
579 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
580
581 /* Replace right delimiting key by first key in R[0] */
582 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
583
584 /* Calculate key component and item length to insert into S[0] */
585 set_le_ih_k_offset( ih, old_key_comp );
586 put_ih_item_len( ih, old_len - tb->rbytes );
587
588 tb->insert_size[0] -= tb->rbytes;
589
590 }
591 else /* whole new item falls into R[0] */
592 {
593 /* Shift rnum[0]-1 items to R[0] */
594 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
595 /* Insert new item into R[0] */
596 bi.tb = tb;
597 bi.bi_bh = tb->R[0];
598 bi.bi_parent = tb->FR[0];
599 bi.bi_position = get_right_neighbor_position (tb, 0);
600 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
601
602 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
603 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
604
605 }
606 zeros_num = tb->insert_size[0] = 0;
607 }
608 }
609 else /* new item or part of it doesn't fall into R[0] */
610 {
611 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
612 }
613 break;
614
615 case M_PASTE: /* append item */
616
617 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
618 {
619 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
620 { /* we must shift the part of the appended item */
621 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
622 { /* we append to directory item */
623 int entry_count;
624
625 RFALSE( zeros_num,
626 "PAP-12145: invalid parameter in case of a directory");
627 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
628 if ( entry_count - tb->rbytes < pos_in_item )
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
633 RFALSE( tb->rbytes - 1 >= entry_count ||
634 ! tb->insert_size[0],
635 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
636 tb->rbytes, entry_count);
637 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
638 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
639 /* Paste given directory entry to directory item */
640 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
641 bi.tb = tb;
642 bi.bi_bh = tb->R[0];
643 bi.bi_parent = tb->FR[0];
644 bi.bi_position = get_right_neighbor_position (tb, 0);
645 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
646 tb->insert_size[0],body,zeros_num);
647 /* paste entry */
648 leaf_paste_entries (
649 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
650 body + DEH_SIZE, tb->insert_size[0]
651 );
652
653 if ( paste_entry_position == 0 ) {
654 /* change delimiting keys */
655 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
656 }
657
658 tb->insert_size[0] = 0;
659 pos_in_item++;
660 }
661 else /* new directory entry doesn't fall into R[0] */
662 {
663 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
664 }
665 }
666 else /* regular object */
667 {
668 int n_shift, n_rem, r_zeros_number;
669 const char * r_body;
670
671 /* Calculate number of bytes which must be shifted from appended item */
672 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
673 n_shift = 0;
674
675 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
676 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
677 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
678
679 leaf_shift_right(tb,tb->rnum[0],n_shift);
680 /* Calculate number of bytes which must remain in body after appending to R[0] */
681 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
682 n_rem = 0;
683
684 {
685 int version;
686 unsigned long temp_rem = n_rem;
687
688 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
689 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
690 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
691 UNFM_P_SHIFT);
692 }
693 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
694 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
695 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
696 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
697 }
698/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
699 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
700 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
701
702 /* Append part of body into R[0] */
703 bi.tb = tb;
704 bi.bi_bh = tb->R[0];
705 bi.bi_parent = tb->FR[0];
706 bi.bi_position = get_right_neighbor_position (tb, 0);
707 if ( n_rem > zeros_num ) {
708 r_zeros_number = 0;
709 r_body = body + n_rem - zeros_num;
710 }
711 else {
712 r_body = body;
713 r_zeros_number = zeros_num - n_rem;
714 zeros_num -= r_zeros_number;
715 }
716
717 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
718
719 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
720#if 0
721 RFALSE( n_rem,
722 "PAP-12160: paste more than one unformatted node pointer");
723#endif
724 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
725 }
726 tb->insert_size[0] = n_rem;
727 if ( ! n_rem )
728 pos_in_item ++;
729 }
730 }
731 else /* pasted item in whole falls into R[0] */
732 {
733 struct item_head * pasted;
734
735 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
736 /* append item in R[0] */
737 if ( pos_in_item >= 0 ) {
738 bi.tb = tb;
739 bi.bi_bh = tb->R[0];
740 bi.bi_parent = tb->FR[0];
741 bi.bi_position = get_right_neighbor_position (tb, 0);
742 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
743 tb->insert_size[0],body, zeros_num);
744 }
745
746 /* paste new entry, if item is directory item */
747 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
748 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
749 leaf_paste_entries (
750 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
751 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
752 );
753 if ( ! pos_in_item ) {
754
755 RFALSE( item_pos - n + tb->rnum[0],
756 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
757
758 /* update delimiting keys */
759 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
760 }
761 }
762
763 if (is_indirect_le_ih (pasted))
764 set_ih_free_space (pasted, 0);
765 zeros_num = tb->insert_size[0] = 0;
766 }
767 }
768 else /* new item doesn't fall into R[0] */
769 {
770 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
771 }
772 break;
773 default: /* cases d and t */
774 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
775 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
776 }
777
778 } /* tb->rnum[0] > 0 */
779
780
781 RFALSE( tb->blknum[0] > 3,
782 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
783 RFALSE( tb->blknum[0] < 0,
784 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
785
786 /* if while adding to a node we discover that it is possible to split
787 it in two, and merge the left part into the left neighbor and the
788 right part into the right neighbor, eliminating the node */
789 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
790
791 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
792 "PAP-12190: lnum and rnum must not be zero");
793 /* if insertion was done before 0-th position in R[0], right
794 delimiting key of the tb->L[0]'s and left delimiting key are
795 not set correctly */
796 if (tb->CFL[0]) {
797 if (!tb->CFR[0])
798 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
799 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
800 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
801 }
802
803 reiserfs_invalidate_buffer(tb,tbS0);
804 return 0;
805 }
806
807
808 /* Fill new nodes that appear in place of S[0] */
809
810 /* I am told that this copying is because we need an array to enable
811 the looping code. -Hans */
812 snum[0] = tb->s1num,
813 snum[1] = tb->s2num;
814 sbytes[0] = tb->s1bytes;
815 sbytes[1] = tb->s2bytes;
816 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
817
818 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
819
820 /* here we shift from S to S_new nodes */
821
822 S_new[i] = get_FEB(tb);
823
824 /* initialized block type and tree level */
825 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
826
827
828 n = B_NR_ITEMS(tbS0);
829
830 switch (flag) {
831 case M_INSERT: /* insert item */
832
833 if ( n - snum[i] < item_pos )
834 { /* new item or it's part falls to first new node S_new[i]*/
835 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
836 { /* part of new item falls into S_new[i] */
837 int old_key_comp, old_len, r_zeros_number;
838 const char * r_body;
839 int version;
840
841 /* Move snum[i]-1 items from S[0] to S_new[i] */
842 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
843 /* Remember key component and item length */
844 version = ih_version (ih);
845 old_key_comp = le_ih_k_offset( ih );
846 old_len = ih_item_len(ih);
847
848 /* Calculate key component and item length to insert into S_new[i] */
849 set_le_ih_k_offset( ih,
850 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
851
852 put_ih_item_len( ih, sbytes[i] );
853
854 /* Insert part of the item into S_new[i] before 0-th item */
855 bi.tb = tb;
856 bi.bi_bh = S_new[i];
857 bi.bi_parent = NULL;
858 bi.bi_position = 0;
859
860 if ( (old_len - sbytes[i]) > zeros_num ) {
861 r_zeros_number = 0;
862 r_body = body + (old_len - sbytes[i]) - zeros_num;
863 }
864 else {
865 r_body = body;
866 r_zeros_number = zeros_num - (old_len - sbytes[i]);
867 zeros_num -= r_zeros_number;
868 }
869
870 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
871
872 /* Calculate key component and item length to insert into S[i] */
873 set_le_ih_k_offset( ih, old_key_comp );
874 put_ih_item_len( ih, old_len - sbytes[i] );
875 tb->insert_size[0] -= sbytes[i];
876 }
877 else /* whole new item falls into S_new[i] */
878 {
879 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
880 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
881
882 /* Insert new item into S_new[i] */
883 bi.tb = tb;
884 bi.bi_bh = S_new[i];
885 bi.bi_parent = NULL;
886 bi.bi_position = 0;
887 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
888
889 zeros_num = tb->insert_size[0] = 0;
890 }
891 }
892
893 else /* new item or it part don't falls into S_new[i] */
894 {
895 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
896 }
897 break;
898
899 case M_PASTE: /* append item */
900
901 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
902 {
903 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
904 { /* we must shift part of the appended item */
905 struct item_head * aux_ih;
906
907 RFALSE( ih, "PAP-12210: ih must be 0");
908
909 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
910 /* we append to directory item */
911
912 int entry_count;
913
914 entry_count = ih_entry_count(aux_ih);
915
916 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
917 /* new directory entry falls into S_new[i] */
918
919 RFALSE( ! tb->insert_size[0],
920 "PAP-12215: insert_size is already 0");
921 RFALSE( sbytes[i] - 1 >= entry_count,
922 "PAP-12220: there are no so much entries (%d), only %d",
923 sbytes[i] - 1, entry_count);
924
925 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
926 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
927 /* Paste given directory entry to directory item */
928 bi.tb = tb;
929 bi.bi_bh = S_new[i];
930 bi.bi_parent = NULL;
931 bi.bi_position = 0;
932 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933 tb->insert_size[0], body,zeros_num);
934 /* paste new directory entry */
935 leaf_paste_entries (
936 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
937 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
938 tb->insert_size[0]
939 );
940 tb->insert_size[0] = 0;
941 pos_in_item++;
942 } else { /* new directory entry doesn't fall into S_new[i] */
943 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
944 }
945 }
946 else /* regular object */
947 {
948 int n_shift, n_rem, r_zeros_number;
949 const char * r_body;
950
951 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
952 tb->insert_size[0] <= 0,
953 "PAP-12225: item too short or insert_size <= 0");
954
955 /* Calculate number of bytes which must be shifted from appended item */
956 n_shift = sbytes[i] - tb->insert_size[0];
957 if ( n_shift < 0 )
958 n_shift = 0;
959 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
960
961 /* Calculate number of bytes which must remain in body after append to S_new[i] */
962 n_rem = tb->insert_size[0] - sbytes[i];
963 if ( n_rem < 0 )
964 n_rem = 0;
965 /* Append part of body into S_new[0] */
966 bi.tb = tb;
967 bi.bi_bh = S_new[i];
968 bi.bi_parent = NULL;
969 bi.bi_position = 0;
970
971 if ( n_rem > zeros_num ) {
972 r_zeros_number = 0;
973 r_body = body + n_rem - zeros_num;
974 }
975 else {
976 r_body = body;
977 r_zeros_number = zeros_num - n_rem;
978 zeros_num -= r_zeros_number;
979 }
980
981 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
982 {
983 struct item_head * tmp;
984
985 tmp = B_N_PITEM_HEAD(S_new[i],0);
986 if (is_indirect_le_ih (tmp)) {
987 set_ih_free_space (tmp, 0);
988 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
989 (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
990 } else {
991 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
992 n_rem );
993 }
994 }
995
996 tb->insert_size[0] = n_rem;
997 if ( ! n_rem )
998 pos_in_item++;
999 }
1000 }
1001 else
1002 /* item falls wholly into S_new[i] */
1003 {
1004 int ret_val;
1005 struct item_head * pasted;
1006
1007#ifdef CONFIG_REISERFS_CHECK
1008 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
1009
1010 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
1011 tb->insert_size[0] <= 0) )
1012 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1013#endif /* CONFIG_REISERFS_CHECK */
1014
1015 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1016
1017 RFALSE( ret_val,
1018 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019 ret_val);
1020
1021 /* paste into item */
1022 bi.tb = tb;
1023 bi.bi_bh = S_new[i];
1024 bi.bi_parent = NULL;
1025 bi.bi_position = 0;
1026 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1027
1028 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1029 if (is_direntry_le_ih (pasted))
1030 {
1031 leaf_paste_entries (
1032 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1033 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1034 );
1035 }
1036
1037 /* if we paste to indirect item update ih_free_space */
1038 if (is_indirect_le_ih (pasted))
1039 set_ih_free_space (pasted, 0);
1040 zeros_num = tb->insert_size[0] = 0;
1041 }
1042 }
1043
1044 else /* pasted item doesn't fall into S_new[i] */
1045 {
1046 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1047 }
1048 break;
1049 default: /* cases d and t */
1050 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1051 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1052 }
1053
1054 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
1055 insert_ptr[i] = S_new[i];
1056
1057 RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) ||
1058 buffer_dirty (S_new [i]),
1059 "PAP-12247: S_new[%d] : (%b)", i, S_new[i]);
1060 }
1061
1062 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1063 affected item which remains in S */
1064 if ( 0 <= item_pos && item_pos < tb->s0num )
1065 { /* if we must insert or append into buffer S[0] */
1066
1067 switch (flag)
1068 {
1069 case M_INSERT: /* insert item into S[0] */
1070 bi.tb = tb;
1071 bi.bi_bh = tbS0;
1072 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1073 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1074 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1075
1076 /* If we insert the first key change the delimiting key */
1077 if( item_pos == 0 ) {
1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1080
1081 }
1082 break;
1083
1084 case M_PASTE: { /* append item in S[0] */
1085 struct item_head * pasted;
1086
1087 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1088 /* when directory, may be new entry already pasted */
1089 if (is_direntry_le_ih (pasted)) {
1090 if ( pos_in_item >= 0 &&
1091 pos_in_item <= ih_entry_count(pasted) ) {
1092
1093 RFALSE( ! tb->insert_size[0],
1094 "PAP-12260: insert_size is 0 already");
1095
1096 /* prepare space */
1097 bi.tb = tb;
1098 bi.bi_bh = tbS0;
1099 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1100 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1101 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1102
1103 /* paste entry */
1104 leaf_paste_entries (
1105 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1106 body + DEH_SIZE, tb->insert_size[0]
1107 );
1108 if ( ! item_pos && ! pos_in_item ) {
1109 RFALSE( !tb->CFL[0] || !tb->L[0],
1110 "PAP-12270: CFL[0]/L[0] must be specified");
1111 if (tb->CFL[0]) {
1112 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1113
1114 }
1115 }
1116 tb->insert_size[0] = 0;
1117 }
1118 } else { /* regular object */
1119 if ( pos_in_item == ih_item_len(pasted) ) {
1120
1121 RFALSE( tb->insert_size[0] <= 0,
1122 "PAP-12275: insert size must not be %d",
1123 tb->insert_size[0]);
1124 bi.tb = tb;
1125 bi.bi_bh = tbS0;
1126 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1127 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1128 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1129
1130 if (is_indirect_le_ih (pasted)) {
1131#if 0
1132 RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
1133 "PAP-12280: insert_size for indirect item must be %d, not %d",
1134 UNFM_P_SIZE, tb->insert_size[0]);
1135#endif
1136 set_ih_free_space (pasted, 0);
1137 }
1138 tb->insert_size[0] = 0;
1139 }
1140
1141#ifdef CONFIG_REISERFS_CHECK
1142 else {
1143 if ( tb->insert_size[0] ) {
1144 print_cur_tb ("12285");
1145 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
1146 }
1147 }
1148#endif /* CONFIG_REISERFS_CHECK */
1149
1150 }
1151 } /* case M_PASTE: */
1152 }
1153 }
1154
1155#ifdef CONFIG_REISERFS_CHECK
1156 if ( flag == M_PASTE && tb->insert_size[0] ) {
1157 print_cur_tb ("12290");
1158 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
1159 }
1160#endif /* CONFIG_REISERFS_CHECK */
1161
1162 return 0;
1163} /* Leaf level of the tree is balanced (end of balance_leaf) */
1164
1165
1166
1167/* Make empty node */
1168void make_empty_node (struct buffer_info * bi)
1169{
1170 struct block_head * blkh;
1171
1172 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1173
1174 blkh = B_BLK_HEAD(bi->bi_bh);
1175 set_blkh_nr_item( blkh, 0 );
1176 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
1177
1178 if (bi->bi_parent)
1179 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1180}
1181
1182
1183/* Get first empty buffer */
1184struct buffer_head * get_FEB (struct tree_balance * tb)
1185{
1186 int i;
1187 struct buffer_head * first_b;
1188 struct buffer_info bi;
1189
1190 for (i = 0; i < MAX_FEB_SIZE; i ++)
1191 if (tb->FEB[i] != 0)
1192 break;
1193
1194 if (i == MAX_FEB_SIZE)
1195 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1196
1197 bi.tb = tb;
1198 bi.bi_bh = first_b = tb->FEB[i];
1199 bi.bi_parent = NULL;
1200 bi.bi_position = 0;
1201 make_empty_node (&bi);
1202 set_buffer_uptodate(first_b);
1203 tb->FEB[i] = NULL;
1204 tb->used[i] = first_b;
1205
1206 return(first_b);
1207}
1208
1209
1210/* This is now used because reiserfs_free_block has to be able to
1211** schedule.
1212*/
1213static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
1214{
1215 int i;
1216
1217 if (buffer_dirty (bh))
1218 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer");
1219 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
1220 if (!tb->thrown[i]) {
1221 tb->thrown[i] = bh;
1222 get_bh(bh) ; /* free_thrown puts this */
1223 return;
1224 }
1225 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers");
1226}
1227
1228static void free_thrown(struct tree_balance *tb) {
1229 int i ;
1230 b_blocknr_t blocknr ;
1231 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
1232 if (tb->thrown[i]) {
1233 blocknr = tb->thrown[i]->b_blocknr ;
1234 if (buffer_dirty (tb->thrown[i]))
1235 reiserfs_warning (tb->tb_sb,
1236 "free_thrown deals with dirty buffer %d",
1237 blocknr);
1238 brelse(tb->thrown[i]) ; /* incremented in store_thrown */
1239 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
1240 }
1241 }
1242}
1243
1244void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
1245{
1246 struct block_head *blkh;
1247 blkh = B_BLK_HEAD(bh);
1248 set_blkh_level( blkh, FREE_LEVEL );
1249 set_blkh_nr_item( blkh, 0 );
1250
1251 clear_buffer_dirty(bh);
1252 store_thrown (tb, bh);
1253}
1254
1255/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1256void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
1257 struct buffer_head * src, int n_src)
1258{
1259
1260 RFALSE( dest == NULL || src == NULL,
1261 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1262 src, dest);
1263 RFALSE( ! B_IS_KEYS_LEVEL (dest),
1264 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1265 dest);
1266 RFALSE( n_dest < 0 || n_src < 0,
1267 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1268 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1269 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1270 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1271
1272 if (B_IS_ITEMS_LEVEL (src))
1273 /* source buffer contains leaf node */
1274 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
1275 else
1276 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
1277
1278 do_balance_mark_internal_dirty (tb, dest, 0);
1279}
1280
1281
1282int get_left_neighbor_position (
1283 struct tree_balance * tb,
1284 int h
1285 )
1286{
1287 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1288
1289 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
1290 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1291 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
1292
1293 if (Sh_position == 0)
1294 return B_NR_ITEMS (tb->FL[h]);
1295 else
1296 return Sh_position - 1;
1297}
1298
1299
1300int get_right_neighbor_position (struct tree_balance * tb, int h)
1301{
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1303
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
1305 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1306 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
1307
1308 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
1309 return 0;
1310 else
1311 return Sh_position + 1;
1312}
1313
1314
1315#ifdef CONFIG_REISERFS_CHECK
1316
1317int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value);
1318static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
1319{
1320 struct disk_child * dc;
1321 int i;
1322
1323 RFALSE( !bh, "PAP-12336: bh == 0");
1324
1325 if (!bh || !B_IS_IN_TREE (bh))
1326 return;
1327
1328 RFALSE( !buffer_dirty (bh) &&
1329 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1330 "PAP-12337: buffer (%b) must be dirty", bh);
1331 dc = B_N_CHILD (bh, 0);
1332
1333 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1334 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1335 print_cur_tb (mes);
1336 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1337 }
1338 }
1339}
1340
1341
1342static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
1343{
1344 if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) ||
1345 !B_IS_IN_TREE (bh) ) {
1346 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)",
1347 which, bh);
1348 return 1;
1349 }
1350 return 0;
1351}
1352
1353
1354static int check_before_balancing (struct tree_balance * tb)
1355{
1356 int retval = 0;
1357
1358 if ( cur_tb ) {
1359 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
1360 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1361 "do_balance cannot properly handle schedule occurring while it runs.");
1362 }
1363
1364 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1365 prepped all of these for us). */
1366 if ( tb->lnum[0] ) {
1367 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1368 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1369 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1370 check_leaf (tb->L[0]);
1371 }
1372 if ( tb->rnum[0] ) {
1373 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1374 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1375 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1376 check_leaf (tb->R[0]);
1377 }
1378 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1379 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1380
1381 return retval;
1382}
1383
1384
1385static void check_after_balance_leaf (struct tree_balance * tb)
1386{
1387 if (tb->lnum[0]) {
1388 if (B_FREE_SPACE (tb->L[0]) !=
1389 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
1390 print_cur_tb ("12221");
1391 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1392 }
1393 }
1394 if (tb->rnum[0]) {
1395 if (B_FREE_SPACE (tb->R[0]) !=
1396 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
1397 print_cur_tb ("12222");
1398 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1399 }
1400 }
1401 if (PATH_H_PBUFFER(tb->tb_path,1) &&
1402 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) !=
1403 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1404 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1405 PATH_H_POSITION (tb->tb_path, 1)))) )) {
1406 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
1407 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1408 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1409 PATH_H_POSITION (tb->tb_path, 1))));
1410 print_cur_tb ("12223");
1411 reiserfs_warning (tb->tb_sb,
1412 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1413 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1414 left,
1415 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1416 PATH_H_PBUFFER(tb->tb_path,1),
1417 PATH_H_POSITION (tb->tb_path, 1),
1418 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1419 right );
1420 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1421 }
1422}
1423
1424
1425static void check_leaf_level (struct tree_balance * tb)
1426{
1427 check_leaf (tb->L[0]);
1428 check_leaf (tb->R[0]);
1429 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1430}
1431
1432static void check_internal_levels (struct tree_balance * tb)
1433{
1434 int h;
1435
1436 /* check all internal nodes */
1437 for (h = 1; tb->insert_size[h]; h ++) {
1438 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
1439 if (tb->lnum[h])
1440 check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
1441 if (tb->rnum[h])
1442 check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
1443 }
1444
1445}
1446
1447#endif
1448
1449
1450
1451
1452
1453
1454/* Now we have all of the buffers that must be used in balancing of
1455 the tree. We rely on the assumption that schedule() will not occur
1456 while do_balance works. ( Only interrupt handlers are acceptable.)
1457 We balance the tree according to the analysis made before this,
1458 using buffers already obtained. For SMP support it will someday be
1459 necessary to add ordered locking of tb. */
1460
1461/* Some interesting rules of balancing:
1462
1463 we delete a maximum of two nodes per level per balancing: we never
1464 delete R, when we delete two of three nodes L, S, R then we move
1465 them into R.
1466
1467 we only delete L if we are deleting two nodes, if we delete only
1468 one node we delete S
1469
1470 if we shift leaves then we shift as much as we can: this is a
1471 deliberate policy of extremism in node packing which results in
1472 higher average utilization after repeated random balance operations
1473 at the cost of more memory copies and more balancing as a result of
1474 small insertions to full nodes.
1475
1476 if we shift internal nodes we try to evenly balance the node
1477 utilization, with consequent less balancing at the cost of lower
1478 utilization.
1479
1480 one could argue that the policy for directories in leaves should be
1481 that of internal nodes, but we will wait until another day to
1482 evaluate this.... It would be nice to someday measure and prove
1483 these assumptions as to what is optimal....
1484
1485*/
1486
1487static inline void do_balance_starts (struct tree_balance *tb)
1488{
1489 /* use print_cur_tb() to see initial state of struct
1490 tree_balance */
1491
1492 /* store_print_tb (tb); */
1493
1494 /* do not delete, just comment it out */
1495/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1496 "check");*/
1497 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
1498#ifdef CONFIG_REISERFS_CHECK
1499 cur_tb = tb;
1500#endif
1501}
1502
1503
1504static inline void do_balance_completed (struct tree_balance * tb)
1505{
1506
1507#ifdef CONFIG_REISERFS_CHECK
1508 check_leaf_level (tb);
1509 check_internal_levels (tb);
1510 cur_tb = NULL;
1511#endif
1512
1513 /* reiserfs_free_block is no longer schedule safe. So, we need to
1514 ** put the buffers we want freed on the thrown list during do_balance,
1515 ** and then free them now
1516 */
1517
1518 REISERFS_SB(tb->tb_sb)->s_do_balance ++;
1519
1520
1521 /* release all nodes hold to perform the balancing */
1522 unfix_nodes(tb);
1523
1524 free_thrown(tb) ;
1525}
1526
1527
1528
1529
1530
1531void do_balance (struct tree_balance * tb, /* tree_balance structure */
1532 struct item_head * ih, /* item header of inserted item */
1533 const char * body, /* body of inserted item or bytes to paste */
1534 int flag) /* i - insert, d - delete
1535 c - cut, p - paste
1536
1537 Cut means delete part of an item
1538 (includes removing an entry from a
1539 directory).
1540
1541 Delete means delete whole item.
1542
1543 Insert means add a new item into the
1544 tree.
1545
1546 Paste means to append to the end of an
1547 existing file or to insert a directory
1548 entry. */
1549{
1550 int child_pos, /* position of a child node in its parent */
1551 h; /* level of the tree being processed */
1552 struct item_head insert_key[2]; /* in our processing of one level
1553 we sometimes determine what
1554 must be inserted into the next
1555 higher level. This insertion
1556 consists of a key or two keys
1557 and their corresponding
1558 pointers */
1559 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1560 level */
1561
1562 tb->tb_mode = flag;
1563 tb->need_balance_dirty = 0;
1564
1565 if (FILESYSTEM_CHANGED_TB(tb)) {
1566 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1567 }
1568 /* if we have no real work to do */
1569 if ( ! tb->insert_size[0] ) {
1570 reiserfs_warning (tb->tb_sb,
1571 "PAP-12350: do_balance: insert_size == 0, mode == %c",
1572 flag);
1573 unfix_nodes(tb);
1574 return;
1575 }
1576
1577 atomic_inc (&(fs_generation (tb->tb_sb)));
1578 do_balance_starts (tb);
1579
1580 /* balance leaf returns 0 except if combining L R and S into
1581 one node. see balance_internal() for explanation of this
1582 line of code.*/
1583 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
1584 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
1585
1586#ifdef CONFIG_REISERFS_CHECK
1587 check_after_balance_leaf (tb);
1588#endif
1589
1590 /* Balance internal level of the tree. */
1591 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
1592 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
1593
1594
1595 do_balance_completed (tb);
1596
1597}