blob: 15fa4cbdce3eefa2ceeefc33f9dd2ed0ce50a6f5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/*
6 * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7 * Programm System Institute
8 * Pereslavl-Zalessky Russia
9 */
10
11/*
12 * This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * decrement_counters_in_path
27 * reiserfs_check_path
28 * pathrelse_and_restore
29 * pathrelse
30 * search_by_key_reada
31 * search_by_key
32 * search_for_position_by_key
33 * comp_items
34 * prepare_for_direct_item
35 * prepare_for_direntry_item
36 * prepare_for_delete_or_cut
37 * calc_deleted_bytes_number
38 * init_tb_struct
39 * padd_item
40 * reiserfs_delete_item
41 * reiserfs_delete_solid_item
42 * reiserfs_delete_object
43 * maybe_indirect_to_direct
44 * indirect_to_direct_roll_back
45 * reiserfs_cut_from_item
46 * truncate_directory
47 * reiserfs_do_truncate
48 * reiserfs_paste_into_item
49 * reiserfs_insert_item
50 */
51
52#include <linux/config.h>
53#include <linux/time.h>
54#include <linux/string.h>
55#include <linux/pagemap.h>
56#include <linux/reiserfs_fs.h>
57#include <linux/smp_lock.h>
58#include <linux/buffer_head.h>
59#include <linux/quotaops.h>
60
61/* Does the buffer contain a disk block which is in the tree. */
62inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh)
63{
64
65 RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT,
66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
67
68 return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
69}
70
71//
72// to gets item head in le form
73//
74inline void copy_item_head(struct item_head * p_v_to,
75 const struct item_head * p_v_from)
76{
77 memcpy (p_v_to, p_v_from, IH_SIZE);
78}
79
80
81/* k1 is pointer to on-disk structure which is stored in little-endian
82 form. k2 is pointer to cpu variable. For key of items of the same
83 object this returns 0.
84 Returns: -1 if key1 < key2
85 0 if key1 == key2
86 1 if key1 > key2 */
87inline int comp_short_keys (const struct reiserfs_key * le_key,
88 const struct cpu_key * cpu_key)
89{
Al Viro3e8962b2005-05-01 08:59:18 -070090 __le32 * p_s_le_u32;
91 __u32 * p_s_cpu_u32;
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 int n_key_length = REISERFS_SHORT_KEY_LEN;
93
Al Viro3e8962b2005-05-01 08:59:18 -070094 p_s_le_u32 = (__le32 *)le_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key;
96 for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
97 if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
98 return -1;
99 if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 )
100 return 1;
101 }
102
103 return 0;
104}
105
106
107/* k1 is pointer to on-disk structure which is stored in little-endian
108 form. k2 is pointer to cpu variable.
109 Compare keys using all 4 key fields.
110 Returns: -1 if key1 < key2 0
111 if key1 = key2 1 if key1 > key2 */
112static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key)
113{
114 int retval;
115
116 retval = comp_short_keys (le_key, cpu_key);
117 if (retval)
118 return retval;
119 if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key))
120 return -1;
121 if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key))
122 return 1;
123
124 if (cpu_key->key_length == 3)
125 return 0;
126
127 /* this part is needed only when tail conversion is in progress */
128 if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key))
129 return -1;
130
131 if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key))
132 return 1;
133
134 return 0;
135}
136
137
138inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2)
139{
140 __u32 * p_s_1_u32, * p_s_2_u32;
141 int n_key_length = REISERFS_SHORT_KEY_LEN;
142
143 p_s_1_u32 = (__u32 *)key1;
144 p_s_2_u32 = (__u32 *)key2;
145 for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
146 if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) )
147 return -1;
148 if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) )
149 return 1;
150 }
151 return 0;
152}
153
154inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from)
155{
156 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
157 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
158
159 // find out version of the key
160 to->version = le_key_version (from);
161 if (to->version == KEY_FORMAT_3_5) {
162 to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset);
163 to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness);
164 } else {
165 to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2);
166 to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2);
167 }
168}
169
170
171
172// this does not say which one is bigger, it only returns 1 if keys
173// are not equal, 0 otherwise
174inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2)
175{
176 return memcmp (k1, k2, sizeof (struct reiserfs_key));
177}
178
179/**************************************************************************
180 * Binary search toolkit function *
181 * Search for an item in the array by the item key *
182 * Returns: 1 if found, 0 if not found; *
183 * *p_n_pos = number of the searched element if found, else the *
184 * number of the first element that is larger than p_v_key. *
185 **************************************************************************/
186/* For those not familiar with binary search: n_lbound is the leftmost item that it
187 could be, n_rbound the rightmost item that it could be. We examine the item
188 halfway between n_lbound and n_rbound, and that tells us either that we can increase
189 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
190 there are no possible items, and we have not found it. With each examination we
191 cut the number of possible items it could be by one more than half rounded down,
192 or we find it. */
193static inline int bin_search (
194 const void * p_v_key, /* Key to search for. */
195 const void * p_v_base,/* First item in the array. */
196 int p_n_num, /* Number of items in the array. */
197 int p_n_width, /* Item size in the array.
198 searched. Lest the reader be
199 confused, note that this is crafted
200 as a general function, and when it
201 is applied specifically to the array
202 of item headers in a node, p_n_width
203 is actually the item header size not
204 the item size. */
205 int * p_n_pos /* Number of the searched for element. */
206 ) {
207 int n_rbound, n_lbound, n_j;
208
209 for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 )
210 switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) {
211 case -1: n_lbound = n_j + 1; continue;
212 case 1: n_rbound = n_j - 1; continue;
213 case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */
214 }
215
216 /* bin_search did not find given key, it returns position of key,
217 that is minimal and greater than the given one. */
218 *p_n_pos = n_lbound;
219 return ITEM_NOT_FOUND;
220}
221
222#ifdef CONFIG_REISERFS_CHECK
223extern struct tree_balance * cur_tb;
224#endif
225
226
227
228/* Minimal possible key. It is never in the tree. */
229const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}};
230
231/* Maximal possible key. It is never in the tree. */
Al Viro3e8962b2005-05-01 08:59:18 -0700232const struct reiserfs_key MAX_KEY = {
233 __constant_cpu_to_le32(0xffffffff),
234 __constant_cpu_to_le32(0xffffffff),
235 {{__constant_cpu_to_le32(0xffffffff),
236 __constant_cpu_to_le32(0xffffffff)},}
237};
Al Viro6a3a16f2005-05-01 08:59:17 -0700238const struct in_core_key MAX_IN_CORE_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239
240
241/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
242 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
243 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
244 case we return a special key, either MIN_KEY or MAX_KEY. */
245static inline const struct reiserfs_key * get_lkey (
246 const struct path * p_s_chk_path,
247 const struct super_block * p_s_sb
248 ) {
249 int n_position, n_path_offset = p_s_chk_path->path_length;
250 struct buffer_head * p_s_parent;
251
252 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
253 "PAP-5010: invalid offset in the path");
254
255 /* While not higher in path than first element. */
256 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
257
258 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
259 "PAP-5020: parent is not uptodate");
260
261 /* Parent at the path is not in the tree now. */
262 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
263 return &MAX_KEY;
264 /* Check whether position in the parent is correct. */
265 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
266 return &MAX_KEY;
267 /* Check whether parent at the path really points to the child. */
268 if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
269 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
270 return &MAX_KEY;
271 /* Return delimiting key if position in the parent is not equal to zero. */
272 if ( n_position )
273 return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
274 }
275 /* Return MIN_KEY if we are in the root of the buffer tree. */
276 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
277 SB_ROOT_BLOCK (p_s_sb) )
278 return &MIN_KEY;
279 return &MAX_KEY;
280}
281
282
283/* Get delimiting key of the buffer at the path and its right neighbor. */
284inline const struct reiserfs_key * get_rkey (
285 const struct path * p_s_chk_path,
286 const struct super_block * p_s_sb
287 ) {
288 int n_position,
289 n_path_offset = p_s_chk_path->path_length;
290 struct buffer_head * p_s_parent;
291
292 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
293 "PAP-5030: invalid offset in the path");
294
295 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
296
297 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
298 "PAP-5040: parent is not uptodate");
299
300 /* Parent at the path is not in the tree now. */
301 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
302 return &MIN_KEY;
303 /* Check whether position in the parent is correct. */
304 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
305 return &MIN_KEY;
306 /* Check whether parent at the path really points to the child. */
307 if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
308 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
309 return &MIN_KEY;
310 /* Return delimiting key if position in the parent is not the last one. */
311 if ( n_position != B_NR_ITEMS(p_s_parent) )
312 return B_N_PDELIM_KEY(p_s_parent, n_position);
313 }
314 /* Return MAX_KEY if we are in the root of the buffer tree. */
315 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
316 SB_ROOT_BLOCK (p_s_sb) )
317 return &MAX_KEY;
318 return &MIN_KEY;
319}
320
321
322/* Check whether a key is contained in the tree rooted from a buffer at a path. */
323/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
324 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
325 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
326 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
327static inline int key_in_buffer (
328 struct path * p_s_chk_path, /* Path which should be checked. */
329 const struct cpu_key * p_s_key, /* Key which should be checked. */
330 struct super_block * p_s_sb /* Super block pointer. */
331 ) {
332
333 RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
334 p_s_chk_path->path_length > MAX_HEIGHT,
335 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
336 p_s_key, p_s_chk_path->path_length);
337 RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
338 "PAP-5060: device must not be NODEV");
339
340 if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
341 /* left delimiting key is bigger, that the key we look for */
342 return 0;
343 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
344 if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
345 /* p_s_key must be less than right delimitiing key */
346 return 0;
347 return 1;
348}
349
350
351inline void decrement_bcount(
352 struct buffer_head * p_s_bh
353 ) {
354 if ( p_s_bh ) {
355 if ( atomic_read (&(p_s_bh->b_count)) ) {
356 put_bh(p_s_bh) ;
357 return;
358 }
359 reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh);
360 }
361}
362
363
364/* Decrement b_count field of the all buffers in the path. */
365void decrement_counters_in_path (
366 struct path * p_s_search_path
367 ) {
368 int n_path_offset = p_s_search_path->path_length;
369
370 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
371 n_path_offset > EXTENDED_MAX_HEIGHT - 1,
372 "PAP-5080: invalid path offset of %d", n_path_offset);
373
374 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
375 struct buffer_head * bh;
376
377 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
378 decrement_bcount (bh);
379 }
380 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
381}
382
383
384int reiserfs_check_path(struct path *p) {
385 RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
386 "path not properly relsed") ;
387 return 0 ;
388}
389
390
391/* Release all buffers in the path. Restore dirty bits clean
392** when preparing the buffer for the log
393**
394** only called from fix_nodes()
395*/
396void pathrelse_and_restore (
397 struct super_block *s,
398 struct path * p_s_search_path
399 ) {
400 int n_path_offset = p_s_search_path->path_length;
401
402 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
403 "clm-4000: invalid path offset");
404
405 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
406 reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path,
407 n_path_offset));
408 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
409 }
410 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
411}
412
413/* Release all buffers in the path. */
414void pathrelse (
415 struct path * p_s_search_path
416 ) {
417 int n_path_offset = p_s_search_path->path_length;
418
419 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
420 "PAP-5090: invalid path offset");
421
422 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )
423 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
424
425 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
426}
427
428
429
430static int is_leaf (char * buf, int blocksize, struct buffer_head * bh)
431{
432 struct block_head * blkh;
433 struct item_head * ih;
434 int used_space;
435 int prev_location;
436 int i;
437 int nr;
438
439 blkh = (struct block_head *)buf;
440 if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
441 reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
442 return 0;
443 }
444
445 nr = blkh_nr_item(blkh);
446 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
447 /* item number is too big or too small */
448 reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh);
449 return 0;
450 }
451 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
452 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih));
453 if (used_space != blocksize - blkh_free_space(blkh)) {
454 /* free space does not match to calculated amount of use space */
455 reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh);
456 return 0;
457 }
458
459 // FIXME: it is_leaf will hit performance too much - we may have
460 // return 1 here
461
462 /* check tables of item heads */
463 ih = (struct item_head *)(buf + BLKH_SIZE);
464 prev_location = blocksize;
465 for (i = 0; i < nr; i ++, ih ++) {
466 if ( le_ih_k_type(ih) == TYPE_ANY) {
467 reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
468 return 0;
469 }
470 if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) {
471 reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih);
472 return 0;
473 }
474 if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) {
475 reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih);
476 return 0;
477 }
478 if (prev_location - ih_location (ih) != ih_item_len (ih)) {
479 reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih);
480 return 0;
481 }
482 prev_location = ih_location (ih);
483 }
484
485 // one may imagine much more checks
486 return 1;
487}
488
489
490/* returns 1 if buf looks like an internal node, 0 otherwise */
491static int is_internal (char * buf, int blocksize, struct buffer_head * bh)
492{
493 struct block_head * blkh;
494 int nr;
495 int used_space;
496
497 blkh = (struct block_head *)buf;
498 nr = blkh_level(blkh);
499 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
500 /* this level is not possible for internal nodes */
501 reiserfs_warning (NULL, "is_internal: this should be caught earlier");
502 return 0;
503 }
504
505 nr = blkh_nr_item(blkh);
506 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
507 /* for internal which is not root we might check min number of keys */
508 reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
509 return 0;
510 }
511
512 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
513 if (used_space != blocksize - blkh_free_space(blkh)) {
514 reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh);
515 return 0;
516 }
517
518 // one may imagine much more checks
519 return 1;
520}
521
522
523// make sure that bh contains formatted node of reiserfs tree of
524// 'level'-th level
525static int is_tree_node (struct buffer_head * bh, int level)
526{
527 if (B_LEVEL (bh) != level) {
528 reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d",
529 B_LEVEL (bh), level);
530 return 0;
531 }
532 if (level == DISK_LEAF_NODE_LEVEL)
533 return is_leaf (bh->b_data, bh->b_size, bh);
534
535 return is_internal (bh->b_data, bh->b_size, bh);
536}
537
538
539
540#define SEARCH_BY_KEY_READA 16
541
542/* The function is NOT SCHEDULE-SAFE! */
543static void search_by_key_reada (struct super_block * s,
544 struct buffer_head **bh,
545 unsigned long *b, int num)
546{
547 int i,j;
548
549 for (i = 0 ; i < num ; i++) {
550 bh[i] = sb_getblk (s, b[i]);
551 }
552 for (j = 0 ; j < i ; j++) {
553 /*
554 * note, this needs attention if we are getting rid of the BKL
555 * you have to make sure the prepared bit isn't set on this buffer
556 */
557 if (!buffer_uptodate(bh[j]))
558 ll_rw_block(READA, 1, bh + j);
559 brelse(bh[j]);
560 }
561}
562
563/**************************************************************************
564 * Algorithm SearchByKey *
565 * look for item in the Disk S+Tree by its key *
566 * Input: p_s_sb - super block *
567 * p_s_key - pointer to the key to search *
568 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
569 * p_s_search_path - path from the root to the needed leaf *
570 **************************************************************************/
571
572/* This function fills up the path from the root to the leaf as it
573 descends the tree looking for the key. It uses reiserfs_bread to
574 try to find buffers in the cache given their block number. If it
575 does not find them in the cache it reads them from disk. For each
576 node search_by_key finds using reiserfs_bread it then uses
577 bin_search to look through that node. bin_search will find the
578 position of the block_number of the next node if it is looking
579 through an internal node. If it is looking through a leaf node
580 bin_search will find the position of the item which has key either
581 equal to given key, or which is the maximal key less than the given
582 key. search_by_key returns a path that must be checked for the
583 correctness of the top of the path but need not be checked for the
584 correctness of the bottom of the path */
585/* The function is NOT SCHEDULE-SAFE! */
586int search_by_key (struct super_block * p_s_sb,
587 const struct cpu_key * p_s_key, /* Key to search. */
588 struct path * p_s_search_path, /* This structure was
589 allocated and initialized
590 by the calling
591 function. It is filled up
592 by this function. */
593 int n_stop_level /* How far down the tree to search. To
594 stop at leaf level - set to
595 DISK_LEAF_NODE_LEVEL */
596 ) {
597 int n_block_number;
598 int expected_level;
599 struct buffer_head * p_s_bh;
600 struct path_element * p_s_last_element;
601 int n_node_level, n_retval;
602 int right_neighbor_of_leaf_node;
603 int fs_gen;
604 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
605 unsigned long reada_blocks[SEARCH_BY_KEY_READA];
606 int reada_count = 0;
607
608#ifdef CONFIG_REISERFS_CHECK
609 int n_repeat_counter = 0;
610#endif
611
612 PROC_INFO_INC( p_s_sb, search_by_key );
613
614 /* As we add each node to a path we increase its count. This means that
615 we must be careful to release all nodes in a path before we either
616 discard the path struct or re-use the path struct, as we do here. */
617
618 decrement_counters_in_path(p_s_search_path);
619
620 right_neighbor_of_leaf_node = 0;
621
622 /* With each iteration of this loop we search through the items in the
623 current node, and calculate the next current node(next path element)
624 for the next iteration of this loop.. */
625 n_block_number = SB_ROOT_BLOCK (p_s_sb);
626 expected_level = -1;
627 while ( 1 ) {
628
629#ifdef CONFIG_REISERFS_CHECK
630 if ( !(++n_repeat_counter % 50000) )
631 reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
632 "there were %d iterations of while loop "
633 "looking for key %K",
634 current->comm, n_repeat_counter, p_s_key);
635#endif
636
637 /* prep path to have another element added to it. */
638 p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length);
639 fs_gen = get_generation (p_s_sb);
640
641 /* Read the next tree node, and set the last element in the path to
642 have a pointer to it. */
643 if ((p_s_bh = p_s_last_element->pe_buffer =
644 sb_getblk(p_s_sb, n_block_number)) ) {
645 if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
646 search_by_key_reada (p_s_sb, reada_bh,
647 reada_blocks, reada_count);
648 }
649 ll_rw_block(READ, 1, &p_s_bh);
650 wait_on_buffer(p_s_bh);
651 if (!buffer_uptodate(p_s_bh))
652 goto io_error;
653 } else {
654io_error:
655 p_s_search_path->path_length --;
656 pathrelse(p_s_search_path);
657 return IO_ERROR;
658 }
659 reada_count = 0;
660 if (expected_level == -1)
661 expected_level = SB_TREE_HEIGHT (p_s_sb);
662 expected_level --;
663
664 /* It is possible that schedule occurred. We must check whether the key
665 to search is still in the tree rooted from the current buffer. If
666 not then repeat search from the root. */
667 if ( fs_changed (fs_gen, p_s_sb) &&
668 (!B_IS_IN_TREE (p_s_bh) ||
669 B_LEVEL(p_s_bh) != expected_level ||
670 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
671 PROC_INFO_INC( p_s_sb, search_by_key_fs_changed );
672 PROC_INFO_INC( p_s_sb, search_by_key_restarted );
673 PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] );
674 decrement_counters_in_path(p_s_search_path);
675
676 /* Get the root block number so that we can repeat the search
677 starting from the root. */
678 n_block_number = SB_ROOT_BLOCK (p_s_sb);
679 expected_level = -1;
680 right_neighbor_of_leaf_node = 0;
681
682 /* repeat search from the root */
683 continue;
684 }
685
686 /* only check that the key is in the buffer if p_s_key is not
687 equal to the MAX_KEY. Latter case is only possible in
688 "finish_unfinished()" processing during mount. */
689 RFALSE( comp_keys( &MAX_KEY, p_s_key ) &&
690 ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
691 "PAP-5130: key is not in the buffer");
692#ifdef CONFIG_REISERFS_CHECK
693 if ( cur_tb ) {
694 print_cur_tb ("5140");
695 reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!");
696 }
697#endif
698
699 // make sure, that the node contents look like a node of
700 // certain level
701 if (!is_tree_node (p_s_bh, expected_level)) {
702 reiserfs_warning (p_s_sb, "vs-5150: search_by_key: "
703 "invalid format found in block %ld. Fsck?",
704 p_s_bh->b_blocknr);
705 pathrelse (p_s_search_path);
706 return IO_ERROR;
707 }
708
709 /* ok, we have acquired next formatted node in the tree */
710 n_node_level = B_LEVEL (p_s_bh);
711
712 PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );
713
714 RFALSE( n_node_level < n_stop_level,
715 "vs-5152: tree level (%d) is less than stop level (%d)",
716 n_node_level, n_stop_level);
717
718 n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
719 B_NR_ITEMS(p_s_bh),
720 ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
721 &(p_s_last_element->pe_position));
722 if (n_node_level == n_stop_level) {
723 return n_retval;
724 }
725
726 /* we are not in the stop level */
727 if (n_retval == ITEM_FOUND)
728 /* item has been found, so we choose the pointer which is to the right of the found one */
729 p_s_last_element->pe_position++;
730
731 /* if item was not found we choose the position which is to
732 the left of the found item. This requires no code,
733 bin_search did it already.*/
734
735 /* So we have chosen a position in the current node which is
736 an internal node. Now we calculate child block number by
737 position in the node. */
738 n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
739
740 /* if we are going to read leaf nodes, try for read ahead as well */
741 if ((p_s_search_path->reada & PATH_READA) &&
742 n_node_level == DISK_LEAF_NODE_LEVEL + 1)
743 {
744 int pos = p_s_last_element->pe_position;
745 int limit = B_NR_ITEMS(p_s_bh);
746 struct reiserfs_key *le_key;
747
748 if (p_s_search_path->reada & PATH_READA_BACK)
749 limit = 0;
750 while(reada_count < SEARCH_BY_KEY_READA) {
751 if (pos == limit)
752 break;
753 reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
754 if (p_s_search_path->reada & PATH_READA_BACK)
755 pos--;
756 else
757 pos++;
758
759 /*
760 * check to make sure we're in the same object
761 */
762 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
763 if (le32_to_cpu(le_key->k_objectid) !=
764 p_s_key->on_disk_key.k_objectid)
765 {
766 break;
767 }
768 }
769 }
770 }
771}
772
773
774/* Form the path to an item and position in this item which contains
775 file byte defined by p_s_key. If there is no such item
776 corresponding to the key, we point the path to the item with
777 maximal key less than p_s_key, and *p_n_pos_in_item is set to one
778 past the last entry/byte in the item. If searching for entry in a
779 directory item, and it is not found, *p_n_pos_in_item is set to one
780 entry more than the entry with maximal key which is less than the
781 sought key.
782
783 Note that if there is no entry in this same node which is one more,
784 then we point to an imaginary entry. for direct items, the
785 position is in units of bytes, for indirect items the position is
786 in units of blocknr entries, for directory items the position is in
787 units of directory entries. */
788
789/* The function is NOT SCHEDULE-SAFE! */
790int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */
791 const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */
792 struct path * p_s_search_path /* Filled up by this function. */
793 ) {
794 struct item_head * p_le_ih; /* pointer to on-disk structure */
795 int n_blk_size;
796 loff_t item_offset, offset;
797 struct reiserfs_dir_entry de;
798 int retval;
799
800 /* If searching for directory entry. */
801 if ( is_direntry_cpu_key (p_cpu_key) )
802 return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de);
803
804 /* If not searching for directory entry. */
805
806 /* If item is found. */
807 retval = search_item (p_s_sb, p_cpu_key, p_s_search_path);
808 if (retval == IO_ERROR)
809 return retval;
810 if ( retval == ITEM_FOUND ) {
811
812 RFALSE( ! ih_item_len(
813 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
814 PATH_LAST_POSITION(p_s_search_path))),
815 "PAP-5165: item length equals zero");
816
817 pos_in_item(p_s_search_path) = 0;
818 return POSITION_FOUND;
819 }
820
821 RFALSE( ! PATH_LAST_POSITION(p_s_search_path),
822 "PAP-5170: position equals zero");
823
824 /* Item is not found. Set path to the previous item. */
825 p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path));
826 n_blk_size = p_s_sb->s_blocksize;
827
828 if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) {
829 return FILE_NOT_FOUND;
830 }
831
832 // FIXME: quite ugly this far
833
834 item_offset = le_ih_k_offset (p_le_ih);
835 offset = cpu_key_k_offset (p_cpu_key);
836
837 /* Needed byte is contained in the item pointed to by the path.*/
838 if (item_offset <= offset &&
839 item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) {
840 pos_in_item (p_s_search_path) = offset - item_offset;
841 if ( is_indirect_le_ih(p_le_ih) ) {
842 pos_in_item (p_s_search_path) /= n_blk_size;
843 }
844 return POSITION_FOUND;
845 }
846
847 /* Needed byte is not contained in the item pointed to by the
848 path. Set pos_in_item out of the item. */
849 if ( is_indirect_le_ih (p_le_ih) )
850 pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
851 else
852 pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
853
854 return POSITION_NOT_FOUND;
855}
856
857
858/* Compare given item and item pointed to by the path. */
859int comp_items (const struct item_head * stored_ih, const struct path * p_s_path)
860{
861 struct buffer_head * p_s_bh;
862 struct item_head * ih;
863
864 /* Last buffer at the path is not in the tree. */
865 if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) )
866 return 1;
867
868 /* Last path position is invalid. */
869 if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) )
870 return 1;
871
872 /* we need only to know, whether it is the same item */
873 ih = get_ih (p_s_path);
874 return memcmp (stored_ih, ih, IH_SIZE);
875}
876
877
878/* unformatted nodes are not logged anymore, ever. This is safe
879** now
880*/
881#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
882
883// block can not be forgotten as it is in I/O or held by someone
884#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
885
886
887
888// prepare for delete or cut of direct item
889static inline int prepare_for_direct_item (struct path * path,
890 struct item_head * le_ih,
891 struct inode * inode,
892 loff_t new_file_length,
893 int * cut_size)
894{
895 loff_t round_len;
896
897
898 if ( new_file_length == max_reiserfs_offset (inode) ) {
899 /* item has to be deleted */
900 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
901 return M_DELETE;
902 }
903
904 // new file gets truncated
905 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) {
906 //
907 round_len = ROUND_UP (new_file_length);
908 /* this was n_new_file_length < le_ih ... */
909 if ( round_len < le_ih_k_offset (le_ih) ) {
910 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
911 return M_DELETE; /* Delete this item. */
912 }
913 /* Calculate first position and size for cutting from item. */
914 pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1);
915 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
916
917 return M_CUT; /* Cut from this item. */
918 }
919
920
921 // old file: items may have any length
922
923 if ( new_file_length < le_ih_k_offset (le_ih) ) {
924 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
925 return M_DELETE; /* Delete this item. */
926 }
927 /* Calculate first position and size for cutting from item. */
928 *cut_size = -(ih_item_len(le_ih) -
929 (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
930 return M_CUT; /* Cut from this item. */
931}
932
933
934static inline int prepare_for_direntry_item (struct path * path,
935 struct item_head * le_ih,
936 struct inode * inode,
937 loff_t new_file_length,
938 int * cut_size)
939{
940 if (le_ih_k_offset (le_ih) == DOT_OFFSET &&
941 new_file_length == max_reiserfs_offset (inode)) {
942 RFALSE( ih_entry_count (le_ih) != 2,
943 "PAP-5220: incorrect empty directory item (%h)", le_ih);
944 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
945 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
946 }
947
948 if ( ih_entry_count (le_ih) == 1 ) {
949 /* Delete the directory item such as there is one record only
950 in this item*/
951 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
952 return M_DELETE;
953 }
954
955 /* Cut one record from the directory item. */
956 *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
957 return M_CUT;
958}
959
960
961/* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
962 If the path points to an indirect item, remove some number of its unformatted nodes.
963 In case of file truncate calculate whether this item must be deleted/truncated or last
964 unformatted node of this item will be converted to a direct item.
965 This function returns a determination of what balance mode the calling function should employ. */
966static char prepare_for_delete_or_cut(
967 struct reiserfs_transaction_handle *th,
968 struct inode * inode,
969 struct path * p_s_path,
970 const struct cpu_key * p_s_item_key,
971 int * p_n_removed, /* Number of unformatted nodes which were removed
972 from end of the file. */
973 int * p_n_cut_size,
974 unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
975 ) {
976 struct super_block * p_s_sb = inode->i_sb;
977 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path);
978 struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
979
980 BUG_ON (!th->t_trans_id);
981
982 /* Stat_data item. */
983 if ( is_statdata_le_ih (p_le_ih) ) {
984
985 RFALSE( n_new_file_length != max_reiserfs_offset (inode),
986 "PAP-5210: mode must be M_DELETE");
987
988 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
989 return M_DELETE;
990 }
991
992
993 /* Directory item. */
994 if ( is_direntry_le_ih (p_le_ih) )
995 return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
996
997 /* Direct item. */
998 if ( is_direct_le_ih (p_le_ih) )
999 return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
1000
1001
1002 /* Case of an indirect item. */
1003 {
1004 int n_unfm_number, /* Number of the item unformatted nodes. */
1005 n_counter,
1006 n_blk_size;
Al Viro3e8962b2005-05-01 08:59:18 -07001007 __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 __u32 tmp;
1009 struct item_head s_ih; /* Item header. */
1010 char c_mode; /* Returned mode of the balance. */
1011 int need_research;
1012
1013
1014 n_blk_size = p_s_sb->s_blocksize;
1015
1016 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1017 do {
1018 need_research = 0;
1019 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1020 /* Copy indirect item header to a temp variable. */
1021 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1022 /* Calculate number of unformatted nodes in this item. */
1023 n_unfm_number = I_UNFM_NUM(&s_ih);
1024
1025 RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
1026 pos_in_item (p_s_path) + 1 != n_unfm_number,
1027 "PAP-5240: invalid item %h "
1028 "n_unfm_number = %d *p_n_pos_in_item = %d",
1029 &s_ih, n_unfm_number, pos_in_item (p_s_path));
1030
1031 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1032 if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
1033 pos_in_item (p_s_path) = 0;
1034 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1035 c_mode = M_DELETE;
1036 }
1037 else { /* Case of truncate. */
1038 if ( n_new_file_length < le_ih_k_offset (&s_ih) ) {
1039 pos_in_item (p_s_path) = 0;
1040 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1041 c_mode = M_DELETE; /* Delete this item. */
1042 }
1043 else {
1044 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1045 pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
1046
1047 RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1048 "PAP-5250: invalid position in the item");
1049
1050 /* Either convert last unformatted node of indirect item to direct item or increase
1051 its free space. */
1052 if ( pos_in_item (p_s_path) == n_unfm_number ) {
1053 *p_n_cut_size = 0; /* Nothing to cut. */
1054 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1055 }
1056 /* Calculate size to cut. */
1057 *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
1058
1059 c_mode = M_CUT; /* Cut from this indirect item. */
1060 }
1061 }
1062
1063 RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1064 "PAP-5260: invalid position in the indirect item");
1065
1066 /* pointers to be cut */
1067 n_unfm_number -= pos_in_item (p_s_path);
1068 /* Set pointer to the last unformatted node pointer that is to be cut. */
Al Viro3e8962b2005-05-01 08:59:18 -07001069 p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
1071
1072 /* We go through the unformatted nodes pointers of the indirect
1073 item and look for the unformatted nodes in the cache. If we
1074 found some of them we free it, zero corresponding indirect item
1075 entry and log buffer containing that indirect item. For this we
1076 need to prepare last path element for logging. If some
1077 unformatted node has b_count > 1 we must not free this
1078 unformatted node since it is in use. */
1079 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1080 // note: path could be changed, first line in for loop takes care
1081 // of it
1082
1083 for (n_counter = *p_n_removed;
1084 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
1085
1086 cond_resched();
1087 if (item_moved (&s_ih, p_s_path)) {
1088 need_research = 1 ;
1089 break;
1090 }
Al Viro3e8962b2005-05-01 08:59:18 -07001091 RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1092 p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 "vs-5265: pointer out of range");
1094
1095 /* Hole, nothing to remove. */
1096 if ( ! get_block_num(p_n_unfm_pointer,0) ) {
1097 (*p_n_removed)++;
1098 continue;
1099 }
1100
1101 (*p_n_removed)++;
1102
1103 tmp = get_block_num(p_n_unfm_pointer,0);
1104 put_block_num(p_n_unfm_pointer, 0, 0);
1105 journal_mark_dirty (th, p_s_sb, p_s_bh);
1106 reiserfs_free_block(th, inode, tmp, 1);
1107 if ( item_moved (&s_ih, p_s_path) ) {
1108 need_research = 1;
1109 break ;
1110 }
1111 }
1112
1113 /* a trick. If the buffer has been logged, this
1114 ** will do nothing. If we've broken the loop without
1115 ** logging it, it will restore the buffer
1116 **
1117 */
1118 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1119
1120 /* This loop can be optimized. */
1121 } while ( (*p_n_removed < n_unfm_number || need_research) &&
1122 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND );
1123
1124 RFALSE( *p_n_removed < n_unfm_number,
1125 "PAP-5310: indirect item is not found");
1126 RFALSE( item_moved (&s_ih, p_s_path),
1127 "after while, comp failed, retry") ;
1128
1129 if (c_mode == M_CUT)
1130 pos_in_item (p_s_path) *= UNFM_P_SIZE;
1131 return c_mode;
1132 }
1133}
1134
1135/* Calculate number of bytes which will be deleted or cut during balance */
1136static int calc_deleted_bytes_number(
1137 struct tree_balance * p_s_tb,
1138 char c_mode
1139 ) {
1140 int n_del_size;
1141 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1142
1143 if ( is_statdata_le_ih (p_le_ih) )
1144 return 0;
1145
1146 n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1147 if ( is_direntry_le_ih (p_le_ih) ) {
1148 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1149 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1150 // empty size. ick. FIXME, is this right?
1151 //
1152 return n_del_size ;
1153 }
1154
1155 if ( is_indirect_le_ih (p_le_ih) )
1156 n_del_size = (n_del_size/UNFM_P_SIZE)*
1157 (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
1158 return n_del_size;
1159}
1160
1161static void init_tb_struct(
1162 struct reiserfs_transaction_handle *th,
1163 struct tree_balance * p_s_tb,
1164 struct super_block * p_s_sb,
1165 struct path * p_s_path,
1166 int n_size
1167 ) {
1168
1169 BUG_ON (!th->t_trans_id);
1170
1171 memset (p_s_tb,'\0',sizeof(struct tree_balance));
1172 p_s_tb->transaction_handle = th ;
1173 p_s_tb->tb_sb = p_s_sb;
1174 p_s_tb->tb_path = p_s_path;
1175 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1176 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1177 p_s_tb->insert_size[0] = n_size;
1178}
1179
1180
1181
1182void padd_item (char * item, int total_length, int length)
1183{
1184 int i;
1185
1186 for (i = total_length; i > length; )
1187 item [--i] = 0;
1188}
1189
1190#ifdef REISERQUOTA_DEBUG
1191char key2type(struct reiserfs_key *ih)
1192{
1193 if (is_direntry_le_key(2, ih))
1194 return 'd';
1195 if (is_direct_le_key(2, ih))
1196 return 'D';
1197 if (is_indirect_le_key(2, ih))
1198 return 'i';
1199 if (is_statdata_le_key(2, ih))
1200 return 's';
1201 return 'u';
1202}
1203
1204char head2type(struct item_head *ih)
1205{
1206 if (is_direntry_le_ih(ih))
1207 return 'd';
1208 if (is_direct_le_ih(ih))
1209 return 'D';
1210 if (is_indirect_le_ih(ih))
1211 return 'i';
1212 if (is_statdata_le_ih(ih))
1213 return 's';
1214 return 'u';
1215}
1216#endif
1217
1218/* Delete object item. */
1219int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
1220 struct path * p_s_path, /* Path to the deleted item. */
1221 const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */
1222 struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */
1223 struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */
1224{
1225 struct super_block * p_s_sb = p_s_inode->i_sb;
1226 struct tree_balance s_del_balance;
1227 struct item_head s_ih;
1228 struct item_head *q_ih;
1229 int quota_cut_bytes;
1230 int n_ret_value,
1231 n_del_size,
1232 n_removed;
1233
1234#ifdef CONFIG_REISERFS_CHECK
1235 char c_mode;
1236 int n_iter = 0;
1237#endif
1238
1239 BUG_ON (!th->t_trans_id);
1240
1241 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/);
1242
1243 while ( 1 ) {
1244 n_removed = 0;
1245
1246#ifdef CONFIG_REISERFS_CHECK
1247 n_iter++;
1248 c_mode =
1249#endif
1250 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode));
1251
1252 RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1253
1254 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1255 s_del_balance.insert_size[0] = n_del_size;
1256
1257 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1258 if ( n_ret_value != REPEAT_SEARCH )
1259 break;
1260
1261 PROC_INFO_INC( p_s_sb, delete_item_restarted );
1262
1263 // file system changed, repeat search
1264 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1265 if (n_ret_value == IO_ERROR)
1266 break;
1267 if (n_ret_value == FILE_NOT_FOUND) {
1268 reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: "
1269 "no items of the file %K found", p_s_item_key);
1270 break;
1271 }
1272 } /* while (1) */
1273
1274 if ( n_ret_value != CARRY_ON ) {
1275 unfix_nodes(&s_del_balance);
1276 return 0;
1277 }
1278
1279 // reiserfs_delete_item returns item length when success
1280 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1281 q_ih = get_ih(p_s_path) ;
1282 quota_cut_bytes = ih_item_len(q_ih) ;
1283
1284 /* hack so the quota code doesn't have to guess if the file
1285 ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1286 ** We test the offset because the tail might have been
1287 ** split into multiple items, and we only want to decrement for
1288 ** the unfm node once
1289 */
1290 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1291 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1292 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1293 } else {
1294 quota_cut_bytes = 0 ;
1295 }
1296 }
1297
1298 if ( p_s_un_bh ) {
1299 int off;
1300 char *data ;
1301
1302 /* We are in direct2indirect conversion, so move tail contents
1303 to the unformatted node */
1304 /* note, we do the copy before preparing the buffer because we
1305 ** don't care about the contents of the unformatted node yet.
1306 ** the only thing we really care about is the direct item's data
1307 ** is in the unformatted node.
1308 **
1309 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1310 ** the unformatted node, which might schedule, meaning we'd have to
1311 ** loop all the way back up to the start of the while loop.
1312 **
1313 ** The unformatted node must be dirtied later on. We can't be
1314 ** sure here if the entire tail has been deleted yet.
1315 **
1316 ** p_s_un_bh is from the page cache (all unformatted nodes are
1317 ** from the page cache) and might be a highmem page. So, we
1318 ** can't use p_s_un_bh->b_data.
1319 ** -clm
1320 */
1321
1322 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1323 off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1324 memcpy(data + off,
1325 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1326 kunmap_atomic(data, KM_USER0);
1327 }
1328 /* Perform balancing after all resources have been collected at once. */
1329 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1330
1331#ifdef REISERQUOTA_DEBUG
1332 reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1333#endif
1334 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1335
1336 /* Return deleted body length */
1337 return n_ret_value;
1338}
1339
1340
1341/* Summary Of Mechanisms For Handling Collisions Between Processes:
1342
1343 deletion of the body of the object is performed by iput(), with the
1344 result that if multiple processes are operating on a file, the
1345 deletion of the body of the file is deferred until the last process
1346 that has an open inode performs its iput().
1347
1348 writes and truncates are protected from collisions by use of
1349 semaphores.
1350
1351 creates, linking, and mknod are protected from collisions with other
1352 processes by making the reiserfs_add_entry() the last step in the
1353 creation, and then rolling back all changes if there was a collision.
1354 - Hans
1355*/
1356
1357
1358/* this deletes item which never gets split */
1359void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
1360 struct inode *inode,
1361 struct reiserfs_key * key)
1362{
1363 struct tree_balance tb;
1364 INITIALIZE_PATH (path);
1365 int item_len = 0;
1366 int tb_init = 0 ;
1367 struct cpu_key cpu_key;
1368 int retval;
1369 int quota_cut_bytes = 0;
1370
1371 BUG_ON (!th->t_trans_id);
1372
1373 le_key2cpu_key (&cpu_key, key);
1374
1375 while (1) {
1376 retval = search_item (th->t_super, &cpu_key, &path);
1377 if (retval == IO_ERROR) {
1378 reiserfs_warning (th->t_super,
1379 "vs-5350: reiserfs_delete_solid_item: "
1380 "i/o failure occurred trying to delete %K",
1381 &cpu_key);
1382 break;
1383 }
1384 if (retval != ITEM_FOUND) {
1385 pathrelse (&path);
1386 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1387 if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \
1388 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) )
1389 reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key);
1390 break;
1391 }
1392 if (!tb_init) {
1393 tb_init = 1 ;
1394 item_len = ih_item_len( PATH_PITEM_HEAD(&path) );
1395 init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
1396 }
1397 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ;
1398
1399 retval = fix_nodes (M_DELETE, &tb, NULL, NULL);
1400 if (retval == REPEAT_SEARCH) {
1401 PROC_INFO_INC( th -> t_super, delete_solid_item_restarted );
1402 continue;
1403 }
1404
1405 if (retval == CARRY_ON) {
1406 do_balance (&tb, NULL, NULL, M_DELETE);
1407 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1408#ifdef REISERQUOTA_DEBUG
1409 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key));
1410#endif
1411 DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes);
1412 }
1413 break;
1414 }
1415
1416 // IO_ERROR, NO_DISK_SPACE, etc
1417 reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
1418 "could not delete %K due to fix_nodes failure", &cpu_key);
1419 unfix_nodes (&tb);
1420 break;
1421 }
1422
1423 reiserfs_check_path(&path) ;
1424}
1425
1426
1427int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode)
1428{
1429 int err;
1430 inode->i_size = 0;
1431 BUG_ON (!th->t_trans_id);
1432
1433 /* for directory this deletes item containing "." and ".." */
1434 err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/);
1435 if (err)
1436 return err;
1437
1438#if defined( USE_INODE_GENERATION_COUNTER )
1439 if( !old_format_only ( th -> t_super ) )
1440 {
Al Viro3e8962b2005-05-01 08:59:18 -07001441 __le32 *inode_generation;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442
1443 inode_generation =
1444 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
1445 *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 );
1446 }
1447/* USE_INODE_GENERATION_COUNTER */
1448#endif
1449 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1450
1451 return err;
1452}
1453
1454static void
1455unmap_buffers(struct page *page, loff_t pos) {
1456 struct buffer_head *bh ;
1457 struct buffer_head *head ;
1458 struct buffer_head *next ;
1459 unsigned long tail_index ;
1460 unsigned long cur_index ;
1461
1462 if (page) {
1463 if (page_has_buffers(page)) {
1464 tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
1465 cur_index = 0 ;
1466 head = page_buffers(page) ;
1467 bh = head ;
1468 do {
1469 next = bh->b_this_page ;
1470
1471 /* we want to unmap the buffers that contain the tail, and
1472 ** all the buffers after it (since the tail must be at the
1473 ** end of the file). We don't want to unmap file data
1474 ** before the tail, since it might be dirty and waiting to
1475 ** reach disk
1476 */
1477 cur_index += bh->b_size ;
1478 if (cur_index > tail_index) {
1479 reiserfs_unmap_buffer(bh) ;
1480 }
1481 bh = next ;
1482 } while (bh != head) ;
1483 if ( PAGE_SIZE == bh->b_size ) {
1484 clear_page_dirty(page);
1485 }
1486 }
1487 }
1488}
1489
1490static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th,
1491 struct inode * p_s_inode,
1492 struct page *page,
1493 struct path * p_s_path,
1494 const struct cpu_key * p_s_item_key,
1495 loff_t n_new_file_size,
1496 char * p_c_mode
1497 ) {
1498 struct super_block * p_s_sb = p_s_inode->i_sb;
1499 int n_block_size = p_s_sb->s_blocksize;
1500 int cut_bytes;
1501 BUG_ON (!th->t_trans_id);
1502
1503 if (n_new_file_size != p_s_inode->i_size)
1504 BUG ();
1505
1506 /* the page being sent in could be NULL if there was an i/o error
1507 ** reading in the last block. The user will hit problems trying to
1508 ** read the file, but for now we just skip the indirect2direct
1509 */
1510 if (atomic_read(&p_s_inode->i_count) > 1 ||
1511 !tail_has_to_be_packed (p_s_inode) ||
1512 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1513 // leave tail in an unformatted node
1514 *p_c_mode = M_SKIP_BALANCING;
1515 cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
1516 pathrelse(p_s_path);
1517 return cut_bytes;
1518 }
1519 /* Permorm the conversion to a direct_item. */
1520 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
1521 return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
1522}
1523
1524
1525/* we did indirect_to_direct conversion. And we have inserted direct
1526 item successesfully, but there were no disk space to cut unfm
1527 pointer being converted. Therefore we have to delete inserted
1528 direct item(s) */
1529static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path)
1530{
1531 struct cpu_key tail_key;
1532 int tail_len;
1533 int removed;
1534 BUG_ON (!th->t_trans_id);
1535
1536 make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!!
1537 tail_key.key_length = 4;
1538
1539 tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1540 while (tail_len) {
1541 /* look for the last byte of the tail */
1542 if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND)
1543 reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item");
1544 RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1,
1545 "vs-5616: appended bytes found");
1546 PATH_LAST_POSITION (path) --;
1547
1548 removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/);
1549 RFALSE( removed <= 0 || removed > tail_len,
1550 "vs-5617: there was tail %d bytes, removed item length %d bytes",
1551 tail_len, removed);
1552 tail_len -= removed;
1553 set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed);
1554 }
1555 reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1556 //mark_file_without_tail (inode);
1557 mark_inode_dirty (inode);
1558}
1559
1560
1561/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1562int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
1563 struct path * p_s_path,
1564 struct cpu_key * p_s_item_key,
1565 struct inode * p_s_inode,
1566 struct page *page,
1567 loff_t n_new_file_size)
1568{
1569 struct super_block * p_s_sb = p_s_inode->i_sb;
1570 /* Every function which is going to call do_balance must first
1571 create a tree_balance structure. Then it must fill up this
1572 structure by using the init_tb_struct and fix_nodes functions.
1573 After that we can make tree balancing. */
1574 struct tree_balance s_cut_balance;
1575 struct item_head *p_le_ih;
1576 int n_cut_size = 0, /* Amount to be cut. */
1577 n_ret_value = CARRY_ON,
1578 n_removed = 0, /* Number of the removed unformatted nodes. */
1579 n_is_inode_locked = 0;
1580 char c_mode; /* Mode of the balance. */
1581 int retval2 = -1;
1582 int quota_cut_bytes;
1583 loff_t tail_pos = 0;
1584
1585 BUG_ON (!th->t_trans_id);
1586
1587 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);
1588
1589
1590 /* Repeat this loop until we either cut the item without needing
1591 to balance, or we fix_nodes without schedule occurring */
1592 while ( 1 ) {
1593 /* Determine the balance mode, position of the first byte to
1594 be cut, and size to be cut. In case of the indirect item
1595 free unformatted nodes which are pointed to by the cut
1596 pointers. */
1597
1598 c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed,
1599 &n_cut_size, n_new_file_size);
1600 if ( c_mode == M_CONVERT ) {
1601 /* convert last unformatted node to direct item or leave
1602 tail in the unformatted node */
1603 RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice");
1604
1605 n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key,
1606 n_new_file_size, &c_mode);
1607 if ( c_mode == M_SKIP_BALANCING )
1608 /* tail has been left in the unformatted node */
1609 return n_ret_value;
1610
1611 n_is_inode_locked = 1;
1612
1613 /* removing of last unformatted node will change value we
1614 have to return to truncate. Save it */
1615 retval2 = n_ret_value;
1616 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/
1617
1618 /* So, we have performed the first part of the conversion:
1619 inserting the new direct item. Now we are removing the
1620 last unformatted node pointer. Set key to search for
1621 it. */
1622 set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
1623 p_s_item_key->key_length = 4;
1624 n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
1625 tail_pos = n_new_file_size;
1626 set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
1627 if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
1628 print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
1629 reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key);
1630 }
1631 continue;
1632 }
1633 if (n_cut_size == 0) {
1634 pathrelse (p_s_path);
1635 return 0;
1636 }
1637
1638 s_cut_balance.insert_size[0] = n_cut_size;
1639
1640 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1641 if ( n_ret_value != REPEAT_SEARCH )
1642 break;
1643
1644 PROC_INFO_INC( p_s_sb, cut_from_item_restarted );
1645
1646 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1647 if (n_ret_value == POSITION_FOUND)
1648 continue;
1649
1650 reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key);
1651 unfix_nodes (&s_cut_balance);
1652 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1653 } /* while */
1654
1655 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1656 if ( n_ret_value != CARRY_ON ) {
1657 if ( n_is_inode_locked ) {
1658 // FIXME: this seems to be not needed: we are always able
1659 // to cut item
1660 indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
1661 }
1662 if (n_ret_value == NO_DISK_SPACE)
1663 reiserfs_warning (p_s_sb, "NO_DISK_SPACE");
1664 unfix_nodes (&s_cut_balance);
1665 return -EIO;
1666 }
1667
1668 /* go ahead and perform balancing */
1669
1670 RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1671
1672 /* Calculate number of bytes that need to be cut from the item. */
1673 quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0];
1674 if (retval2 == -1)
1675 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1676 else
1677 n_ret_value = retval2;
1678
1679
1680 /* For direct items, we only change the quota when deleting the last
1681 ** item.
1682 */
1683 p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1684 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1685 if (c_mode == M_DELETE &&
1686 (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
1687 // FIXME: this is to keep 3.5 happy
1688 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1689 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ;
1690 } else {
1691 quota_cut_bytes = 0 ;
1692 }
1693 }
1694#ifdef CONFIG_REISERFS_CHECK
1695 if (n_is_inode_locked) {
1696 struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1697 /* we are going to complete indirect2direct conversion. Make
1698 sure, that we exactly remove last unformatted node pointer
1699 of the item */
1700 if (!is_indirect_le_ih (le_ih))
1701 reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: "
1702 "item must be indirect %h", le_ih);
1703
1704 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1705 reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: "
1706 "completing indirect2direct conversion indirect item %h "
1707 "being deleted must be of 4 byte long", le_ih);
1708
1709 if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1710 reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: "
1711 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1712 le_ih, s_cut_balance.insert_size[0]);
1713 }
1714 /* it would be useful to make sure, that right neighboring
1715 item is direct item of this file */
1716 }
1717#endif
1718
1719 do_balance(&s_cut_balance, NULL, NULL, c_mode);
1720 if ( n_is_inode_locked ) {
1721 /* we've done an indirect->direct conversion. when the data block
1722 ** was freed, it was removed from the list of blocks that must
1723 ** be flushed before the transaction commits, make sure to
1724 ** unmap and invalidate it
1725 */
1726 unmap_buffers(page, tail_pos);
1727 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ;
1728 }
1729#ifdef REISERQUOTA_DEBUG
1730 reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?');
1731#endif
1732 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1733 return n_ret_value;
1734}
1735
1736static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode)
1737{
1738 BUG_ON (!th->t_trans_id);
1739 if (inode->i_nlink)
1740 reiserfs_warning (inode->i_sb,
1741 "vs-5655: truncate_directory: link count != 0");
1742
1743 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET);
1744 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY);
1745 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1746 reiserfs_update_sd(th, inode) ;
1747 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET);
1748 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA);
1749}
1750
1751
1752
1753
1754/* Truncate file to the new size. Note, this must be called with a transaction
1755 already started */
1756int reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
1757 struct inode * p_s_inode, /* ->i_size contains new
1758 size */
1759 struct page *page, /* up to date for last block */
1760 int update_timestamps /* when it is called by
1761 file_release to convert
1762 the tail - no timestamps
1763 should be updated */
1764 ) {
1765 INITIALIZE_PATH (s_search_path); /* Path to the current object item. */
1766 struct item_head * p_le_ih; /* Pointer to an item header. */
1767 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1768 loff_t n_file_size, /* Old file size. */
1769 n_new_file_size;/* New file size. */
1770 int n_deleted; /* Number of deleted or truncated bytes. */
1771 int retval;
1772 int err = 0;
1773
1774 BUG_ON (!th->t_trans_id);
1775 if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1776 return 0;
1777
1778 if (S_ISDIR(p_s_inode->i_mode)) {
1779 // deletion of directory - no need to update timestamps
1780 truncate_directory (th, p_s_inode);
1781 return 0;
1782 }
1783
1784 /* Get new file size. */
1785 n_new_file_size = p_s_inode->i_size;
1786
1787 // FIXME: note, that key type is unimportant here
1788 make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3);
1789
1790 retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path);
1791 if (retval == IO_ERROR) {
1792 reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: "
1793 "i/o failure occurred trying to truncate %K", &s_item_key);
1794 err = -EIO;
1795 goto out;
1796 }
1797 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1798 reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: "
1799 "wrong result %d of search for %K", retval, &s_item_key);
1800
1801 err = -EIO;
1802 goto out;
1803 }
1804
1805 s_search_path.pos_in_item --;
1806
1807 /* Get real file size (total length of all file items) */
1808 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1809 if ( is_statdata_le_ih (p_le_ih) )
1810 n_file_size = 0;
1811 else {
1812 loff_t offset = le_ih_k_offset (p_le_ih);
1813 int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
1814
1815 /* this may mismatch with real file size: if last direct item
1816 had no padding zeros and last unformatted node had no free
1817 space, this file would have this file size */
1818 n_file_size = offset + bytes - 1;
1819 }
1820 /*
1821 * are we doing a full truncate or delete, if so
1822 * kick in the reada code
1823 */
1824 if (n_new_file_size == 0)
1825 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1826
1827 if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1828 goto update_and_out ;
1829 }
1830
1831 /* Update key to search for the last file item. */
1832 set_cpu_key_k_offset (&s_item_key, n_file_size);
1833
1834 do {
1835 /* Cut or delete file item. */
1836 n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size);
1837 if (n_deleted < 0) {
1838 reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1839 reiserfs_check_path(&s_search_path) ;
1840 return 0;
1841 }
1842
1843 RFALSE( n_deleted > n_file_size,
1844 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1845 n_deleted, n_file_size, &s_item_key);
1846
1847 /* Change key to search the last file item. */
1848 n_file_size -= n_deleted;
1849
1850 set_cpu_key_k_offset (&s_item_key, n_file_size);
1851
1852 /* While there are bytes to truncate and previous file item is presented in the tree. */
1853
1854 /*
1855 ** This loop could take a really long time, and could log
1856 ** many more blocks than a transaction can hold. So, we do a polite
1857 ** journal end here, and if the transaction needs ending, we make
1858 ** sure the file is consistent before ending the current trans
1859 ** and starting a new one
1860 */
1861 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1862 int orig_len_alloc = th->t_blocks_allocated ;
1863 decrement_counters_in_path(&s_search_path) ;
1864
1865 if (update_timestamps) {
1866 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1867 }
1868 reiserfs_update_sd(th, p_s_inode) ;
1869
1870 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
1871 if (err)
1872 goto out;
1873 err = journal_begin (th, p_s_inode->i_sb,
1874 JOURNAL_PER_BALANCE_CNT * 6);
1875 if (err)
1876 goto out;
1877 reiserfs_update_inode_transaction(p_s_inode) ;
1878 }
1879 } while ( n_file_size > ROUND_UP (n_new_file_size) &&
1880 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ;
1881
1882 RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1883 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1884 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1885
1886update_and_out:
1887 if (update_timestamps) {
1888 // this is truncate, not file closing
1889 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1890 }
1891 reiserfs_update_sd (th, p_s_inode);
1892
1893out:
1894 pathrelse(&s_search_path) ;
1895 return err;
1896}
1897
1898
1899#ifdef CONFIG_REISERFS_CHECK
1900// this makes sure, that we __append__, not overwrite or add holes
1901static void check_research_for_paste (struct path * path,
1902 const struct cpu_key * p_s_key)
1903{
1904 struct item_head * found_ih = get_ih (path);
1905
1906 if (is_direct_le_ih (found_ih)) {
1907 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) !=
1908 cpu_key_k_offset (p_s_key) ||
1909 op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path))
1910 reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: "
1911 "found direct item %h or position (%d) does not match to key %K",
1912 found_ih, pos_in_item (path), p_s_key);
1913 }
1914 if (is_indirect_le_ih (found_ih)) {
1915 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) ||
1916 I_UNFM_NUM (found_ih) != pos_in_item (path) ||
1917 get_ih_free_space (found_ih) != 0)
1918 reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: "
1919 "found indirect item (%h) or position (%d) does not match to key (%K)",
1920 found_ih, pos_in_item (path), p_s_key);
1921 }
1922}
1923#endif /* config reiserfs check */
1924
1925
1926/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1927int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
1928 struct path * p_s_search_path, /* Path to the pasted item. */
1929 const struct cpu_key * p_s_key, /* Key to search for the needed item.*/
1930 struct inode * inode, /* Inode item belongs to */
1931 const char * p_c_body, /* Pointer to the bytes to paste. */
1932 int n_pasted_size) /* Size of pasted bytes. */
1933{
1934 struct tree_balance s_paste_balance;
1935 int retval;
1936 int fs_gen;
1937
1938 BUG_ON (!th->t_trans_id);
1939
1940 fs_gen = get_generation(inode->i_sb) ;
1941
1942#ifdef REISERQUOTA_DEBUG
1943 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1944#endif
1945
1946 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1947 pathrelse(p_s_search_path);
1948 return -EDQUOT;
1949 }
1950 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
1951#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1952 s_paste_balance.key = p_s_key->on_disk_key;
1953#endif
1954
1955 /* DQUOT_* can schedule, must check before the fix_nodes */
1956 if (fs_changed(fs_gen, inode->i_sb)) {
1957 goto search_again;
1958 }
1959
1960 while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
1961REPEAT_SEARCH ) {
1962search_again:
1963 /* file system changed while we were in the fix_nodes */
1964 PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
1965 retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
1966 if (retval == IO_ERROR) {
1967 retval = -EIO ;
1968 goto error_out ;
1969 }
1970 if (retval == POSITION_FOUND) {
1971 reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key);
1972 retval = -EEXIST ;
1973 goto error_out ;
1974 }
1975
1976#ifdef CONFIG_REISERFS_CHECK
1977 check_research_for_paste (p_s_search_path, p_s_key);
1978#endif
1979 }
1980
1981 /* Perform balancing after all resources are collected by fix_nodes, and
1982 accessing them will not risk triggering schedule. */
1983 if ( retval == CARRY_ON ) {
1984 do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE);
1985 return 0;
1986 }
1987 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1988error_out:
1989 /* this also releases the path */
1990 unfix_nodes(&s_paste_balance);
1991#ifdef REISERQUOTA_DEBUG
1992 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1993#endif
1994 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1995 return retval ;
1996}
1997
1998
1999/* Insert new item into the buffer at the path. */
2000int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2001 struct path * p_s_path, /* Path to the inserteded item. */
2002 const struct cpu_key * key,
2003 struct item_head * p_s_ih, /* Pointer to the item header to insert.*/
2004 struct inode * inode,
2005 const char * p_c_body) /* Pointer to the bytes to insert. */
2006{
2007 struct tree_balance s_ins_balance;
2008 int retval;
2009 int fs_gen = 0 ;
2010 int quota_bytes = 0 ;
2011
2012 BUG_ON (!th->t_trans_id);
2013
2014 if (inode) { /* Do we count quotas for item? */
2015 fs_gen = get_generation(inode->i_sb);
2016 quota_bytes = ih_item_len(p_s_ih);
2017
2018 /* hack so the quota code doesn't have to guess if the file has
2019 ** a tail, links are always tails, so there's no guessing needed
2020 */
2021 if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2022 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
2023 }
2024#ifdef REISERQUOTA_DEBUG
2025 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2026#endif
2027 /* We can't dirty inode here. It would be immediately written but
2028 * appropriate stat item isn't inserted yet... */
2029 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2030 pathrelse(p_s_path);
2031 return -EDQUOT;
2032 }
2033 }
2034 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
2035#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2036 s_ins_balance.key = key->on_disk_key;
2037#endif
2038 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2039 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2040 goto search_again;
2041 }
2042
2043 while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2044search_again:
2045 /* file system changed while we were in the fix_nodes */
2046 PROC_INFO_INC( th -> t_super, insert_item_restarted );
2047 retval = search_item (th->t_super, key, p_s_path);
2048 if (retval == IO_ERROR) {
2049 retval = -EIO;
2050 goto error_out ;
2051 }
2052 if (retval == ITEM_FOUND) {
2053 reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: "
2054 "key %K already exists in the tree", key);
2055 retval = -EEXIST ;
2056 goto error_out;
2057 }
2058 }
2059
2060 /* make balancing after all resources will be collected at a time */
2061 if ( retval == CARRY_ON ) {
2062 do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2063 return 0;
2064 }
2065
2066 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2067error_out:
2068 /* also releases the path */
2069 unfix_nodes(&s_ins_balance);
2070#ifdef REISERQUOTA_DEBUG
2071 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2072#endif
2073 if (inode)
2074 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ;
2075 return retval;
2076}
2077
2078
2079
2080