blob: f102ec939c90be2bd95dd2b35b36bd50c4ea380a [file] [log] [blame]
Joel Becker70ad1ba2008-10-16 17:54:25 -07001/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * blockcheck.c
5 *
6 * Checksum and ECC codes for the OCFS2 userspace library.
7 *
8 * Copyright (C) 2006, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/kernel.h>
21#include <linux/types.h>
22#include <linux/crc32.h>
23#include <linux/buffer_head.h>
24#include <linux/bitops.h>
25#include <asm/byteorder.h>
26
Joel Beckerd6b32bb2008-10-17 14:55:01 -070027#include <cluster/masklog.h>
28
Joel Becker70ad1ba2008-10-16 17:54:25 -070029#include "ocfs2.h"
30
31#include "blockcheck.h"
32
33
Joel Becker70ad1ba2008-10-16 17:54:25 -070034/*
35 * We use the following conventions:
36 *
37 * d = # data bits
38 * p = # parity bits
39 * c = # total code bits (d + p)
40 */
Joel Becker70ad1ba2008-10-16 17:54:25 -070041
Joel Becker7bb458a2008-12-15 18:24:33 -080042
43/*
44 * Find the log base 2 of 32-bit v.
45 *
46 * Algorithm found on http://graphics.stanford.edu/~seander/bithacks.html,
47 * by Sean Eron Anderson. Code on the page is in the public domain unless
48 * otherwise noted.
49 *
50 * This particular algorithm is credited to Eric Cole.
51 */
52static int find_highest_bit_set(unsigned int v)
53{
54
55 static const int MultiplyDeBruijnBitPosition[32] =
56 {
57 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
58 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
59 };
60
61 v |= v >> 1; /* first round down to power of 2 */
62 v |= v >> 2;
63 v |= v >> 4;
64 v |= v >> 8;
65 v |= v >> 16;
66 v = (v >> 1) + 1;
67
68 return MultiplyDeBruijnBitPosition[(u32)(v * 0x077CB531UL) >> 27];
69}
70
Joel Becker70ad1ba2008-10-16 17:54:25 -070071/*
72 * Calculate the bit offset in the hamming code buffer based on the bit's
73 * offset in the data buffer. Since the hamming code reserves all
74 * power-of-two bits for parity, the data bit number and the code bit
75 * number are offest by all the parity bits beforehand.
76 *
77 * Recall that bit numbers in hamming code are 1-based. This function
78 * takes the 0-based data bit from the caller.
79 *
80 * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
81 * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
82 * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
83 * in the code buffer.
84 */
85static unsigned int calc_code_bit(unsigned int i)
86{
87 unsigned int b, p;
88
89 /*
90 * Data bits are 0-based, but we're talking code bits, which
91 * are 1-based.
92 */
93 b = i + 1;
94
95 /*
Joel Becker7bb458a2008-12-15 18:24:33 -080096 * As a cheat, we know that all bits below b's highest bit must be
97 * parity bits, so we can start there.
98 */
99 p = find_highest_bit_set(b);
100 b += p;
101
102 /*
Joel Becker70ad1ba2008-10-16 17:54:25 -0700103 * For every power of two below our bit number, bump our bit.
104 *
105 * We compare with (b + 1) becuase we have to compare with what b
106 * would be _if_ it were bumped up by the parity bit. Capice?
Joel Becker7bb458a2008-12-15 18:24:33 -0800107 *
108 * We start p at 2^p because of the cheat above.
Joel Becker70ad1ba2008-10-16 17:54:25 -0700109 */
Joel Becker7bb458a2008-12-15 18:24:33 -0800110 for (p = (1 << p); p < (b + 1); p <<= 1)
Joel Becker70ad1ba2008-10-16 17:54:25 -0700111 b++;
112
113 return b;
114}
115
116/*
117 * This is the low level encoder function. It can be called across
118 * multiple hunks just like the crc32 code. 'd' is the number of bits
119 * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
120 * two 512B buffers, you would do it like so:
121 *
122 * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
123 * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
124 *
125 * If you just have one buffer, use ocfs2_hamming_encode_block().
126 */
127u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
128{
Joel Beckere798b3f2008-12-15 17:13:48 -0800129 unsigned int i, b;
Joel Becker70ad1ba2008-10-16 17:54:25 -0700130
Joel Beckere798b3f2008-12-15 17:13:48 -0800131 BUG_ON(!d);
Joel Becker70ad1ba2008-10-16 17:54:25 -0700132
133 /*
134 * b is the hamming code bit number. Hamming code specifies a
135 * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
136 * for the algorithm.
137 *
138 * The i++ in the for loop is so that the start offset passed
139 * to ocfs2_find_next_bit_set() is one greater than the previously
140 * found bit.
141 */
142 for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
143 {
144 /*
145 * i is the offset in this hunk, nr + i is the total bit
146 * offset.
147 */
148 b = calc_code_bit(nr + i);
149
Joel Beckere798b3f2008-12-15 17:13:48 -0800150 /*
151 * Data bits in the resultant code are checked by
152 * parity bits that are part of the bit number
153 * representation. Huh?
154 *
155 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
156 * In other words, the parity bit at position 2^k
157 * checks bits in positions having bit k set in
158 * their binary representation. Conversely, for
159 * instance, bit 13, i.e. 1101(2), is checked by
160 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
161 * </wikipedia>
162 *
163 * Note that 'k' is the _code_ bit number. 'b' in
164 * our loop.
165 */
166 parity ^= b;
Joel Becker70ad1ba2008-10-16 17:54:25 -0700167 }
168
169 /* While the data buffer was treated as little endian, the
170 * return value is in host endian. */
171 return parity;
172}
173
174u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
175{
176 return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
177}
178
179/*
180 * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
181 * offset of the current hunk. If bit to be fixed is not part of the
182 * current hunk, this does nothing.
183 *
184 * If you only have one hunk, use ocfs2_hamming_fix_block().
185 */
186void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
187 unsigned int fix)
188{
Joel Becker70ad1ba2008-10-16 17:54:25 -0700189 unsigned int i, b;
190
Joel Beckere798b3f2008-12-15 17:13:48 -0800191 BUG_ON(!d);
Joel Becker70ad1ba2008-10-16 17:54:25 -0700192
193 /*
194 * If the bit to fix has an hweight of 1, it's a parity bit. One
195 * busted parity bit is its own error. Nothing to do here.
196 */
197 if (hweight32(fix) == 1)
198 return;
199
200 /*
201 * nr + d is the bit right past the data hunk we're looking at.
202 * If fix after that, nothing to do
203 */
204 if (fix >= calc_code_bit(nr + d))
205 return;
206
207 /*
208 * nr is the offset in the data hunk we're starting at. Let's
209 * start b at the offset in the code buffer. See hamming_encode()
210 * for a more detailed description of 'b'.
211 */
212 b = calc_code_bit(nr);
213 /* If the fix is before this hunk, nothing to do */
214 if (fix < b)
215 return;
216
217 for (i = 0; i < d; i++, b++)
218 {
219 /* Skip past parity bits */
220 while (hweight32(b) == 1)
221 b++;
222
223 /*
224 * i is the offset in this data hunk.
225 * nr + i is the offset in the total data buffer.
226 * b is the offset in the total code buffer.
227 *
228 * Thus, when b == fix, bit i in the current hunk needs
229 * fixing.
230 */
231 if (b == fix)
232 {
233 if (ocfs2_test_bit(i, data))
234 ocfs2_clear_bit(i, data);
235 else
236 ocfs2_set_bit(i, data);
237 break;
238 }
239 }
240}
241
242void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
243 unsigned int fix)
244{
245 ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
246}
247
248/*
249 * This function generates check information for a block.
250 * data is the block to be checked. bc is a pointer to the
251 * ocfs2_block_check structure describing the crc32 and the ecc.
252 *
253 * bc should be a pointer inside data, as the function will
254 * take care of zeroing it before calculating the check information. If
255 * bc does not point inside data, the caller must make sure any inline
256 * ocfs2_block_check structures are zeroed.
257 *
258 * The data buffer must be in on-disk endian (little endian for ocfs2).
259 * bc will be filled with little-endian values and will be ready to go to
260 * disk.
261 */
262void ocfs2_block_check_compute(void *data, size_t blocksize,
263 struct ocfs2_block_check *bc)
264{
265 u32 crc;
266 u32 ecc;
267
268 memset(bc, 0, sizeof(struct ocfs2_block_check));
269
270 crc = crc32_le(~0, data, blocksize);
271 ecc = ocfs2_hamming_encode_block(data, blocksize);
272
273 /*
274 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
275 * larger than 16 bits.
276 */
277 BUG_ON(ecc > USHORT_MAX);
278
279 bc->bc_crc32e = cpu_to_le32(crc);
280 bc->bc_ecc = cpu_to_le16((u16)ecc);
281}
282
283/*
284 * This function validates existing check information. Like _compute,
285 * the function will take care of zeroing bc before calculating check codes.
286 * If bc is not a pointer inside data, the caller must have zeroed any
287 * inline ocfs2_block_check structures.
288 *
289 * Again, the data passed in should be the on-disk endian.
290 */
291int ocfs2_block_check_validate(void *data, size_t blocksize,
292 struct ocfs2_block_check *bc)
293{
294 int rc = 0;
295 struct ocfs2_block_check check;
296 u32 crc, ecc;
297
298 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
299 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
300
301 memset(bc, 0, sizeof(struct ocfs2_block_check));
302
303 /* Fast path - if the crc32 validates, we're good to go */
304 crc = crc32_le(~0, data, blocksize);
305 if (crc == check.bc_crc32e)
306 goto out;
307
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700308 mlog(ML_ERROR,
309 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
310 (unsigned int)check.bc_crc32e, (unsigned int)crc);
311
Joel Becker70ad1ba2008-10-16 17:54:25 -0700312 /* Ok, try ECC fixups */
313 ecc = ocfs2_hamming_encode_block(data, blocksize);
314 ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
315
316 /* And check the crc32 again */
317 crc = crc32_le(~0, data, blocksize);
318 if (crc == check.bc_crc32e)
319 goto out;
320
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700321 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
322 (unsigned int)check.bc_crc32e, (unsigned int)crc);
323
Joel Becker70ad1ba2008-10-16 17:54:25 -0700324 rc = -EIO;
325
326out:
327 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
328 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
329
330 return rc;
331}
332
333/*
334 * This function generates check information for a list of buffer_heads.
335 * bhs is the blocks to be checked. bc is a pointer to the
336 * ocfs2_block_check structure describing the crc32 and the ecc.
337 *
338 * bc should be a pointer inside data, as the function will
339 * take care of zeroing it before calculating the check information. If
340 * bc does not point inside data, the caller must make sure any inline
341 * ocfs2_block_check structures are zeroed.
342 *
343 * The data buffer must be in on-disk endian (little endian for ocfs2).
344 * bc will be filled with little-endian values and will be ready to go to
345 * disk.
346 */
347void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
348 struct ocfs2_block_check *bc)
349{
350 int i;
351 u32 crc, ecc;
352
353 BUG_ON(nr < 0);
354
355 if (!nr)
356 return;
357
358 memset(bc, 0, sizeof(struct ocfs2_block_check));
359
360 for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
361 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
362 /*
363 * The number of bits in a buffer is obviously b_size*8.
364 * The offset of this buffer is b_size*i, so the bit offset
365 * of this buffer is b_size*8*i.
366 */
367 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
368 bhs[i]->b_size * 8,
369 bhs[i]->b_size * 8 * i);
370 }
371
372 /*
373 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
374 * larger than 16 bits.
375 */
376 BUG_ON(ecc > USHORT_MAX);
377
378 bc->bc_crc32e = cpu_to_le32(crc);
379 bc->bc_ecc = cpu_to_le16((u16)ecc);
380}
381
382/*
383 * This function validates existing check information on a list of
384 * buffer_heads. Like _compute_bhs, the function will take care of
385 * zeroing bc before calculating check codes. If bc is not a pointer
386 * inside data, the caller must have zeroed any inline
387 * ocfs2_block_check structures.
388 *
389 * Again, the data passed in should be the on-disk endian.
390 */
391int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
392 struct ocfs2_block_check *bc)
393{
394 int i, rc = 0;
395 struct ocfs2_block_check check;
396 u32 crc, ecc, fix;
397
398 BUG_ON(nr < 0);
399
400 if (!nr)
401 return 0;
402
403 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
404 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
405
406 memset(bc, 0, sizeof(struct ocfs2_block_check));
407
408 /* Fast path - if the crc32 validates, we're good to go */
409 for (i = 0, crc = ~0; i < nr; i++)
410 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
411 if (crc == check.bc_crc32e)
412 goto out;
413
414 mlog(ML_ERROR,
415 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
416 (unsigned int)check.bc_crc32e, (unsigned int)crc);
417
418 /* Ok, try ECC fixups */
419 for (i = 0, ecc = 0; i < nr; i++) {
420 /*
421 * The number of bits in a buffer is obviously b_size*8.
422 * The offset of this buffer is b_size*i, so the bit offset
423 * of this buffer is b_size*8*i.
424 */
425 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
426 bhs[i]->b_size * 8,
427 bhs[i]->b_size * 8 * i);
428 }
429 fix = ecc ^ check.bc_ecc;
430 for (i = 0; i < nr; i++) {
431 /*
432 * Try the fix against each buffer. It will only affect
433 * one of them.
434 */
435 ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
436 bhs[i]->b_size * 8 * i, fix);
437 }
438
439 /* And check the crc32 again */
440 for (i = 0, crc = ~0; i < nr; i++)
441 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
442 if (crc == check.bc_crc32e)
443 goto out;
444
445 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
446 (unsigned int)check.bc_crc32e, (unsigned int)crc);
447
448 rc = -EIO;
449
450out:
451 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
452 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
453
454 return rc;
455}
456
457/*
458 * These are the main API. They check the superblock flag before
459 * calling the underlying operations.
460 *
461 * They expect the buffer(s) to be in disk format.
462 */
463void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
464 struct ocfs2_block_check *bc)
465{
466 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
467 ocfs2_block_check_compute(data, sb->s_blocksize, bc);
468}
469
470int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
471 struct ocfs2_block_check *bc)
472{
473 int rc = 0;
474
475 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
476 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
477
478 return rc;
479}
480
481void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
482 struct buffer_head **bhs, int nr,
483 struct ocfs2_block_check *bc)
484{
485 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
486 ocfs2_block_check_compute_bhs(bhs, nr, bc);
487}
488
489int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
490 struct buffer_head **bhs, int nr,
491 struct ocfs2_block_check *bc)
492{
493 int rc = 0;
494
495 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
496 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
497
498 return rc;
499}
500