md/r5cache: r5cache recovery: part 2
1. In previous patch, we:
- add new data to r5l_recovery_ctx
- add new functions to recovery write-back cache
The new functions are not used in this patch, so this patch does not
change the behavior of recovery.
2. In this patchpatch, we:
- modify main recovery procedure r5l_recovery_log() to call new
functions
- remove old functions
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3aa7ecc..6b99570 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1393,156 +1393,6 @@
return 0;
}
-static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
- struct r5l_recovery_ctx *ctx,
- sector_t stripe_sect,
- int *offset)
-{
- struct r5conf *conf = log->rdev->mddev->private;
- struct stripe_head *sh;
- struct r5l_payload_data_parity *payload;
- int disk_index;
-
- sh = raid5_get_active_stripe(conf, stripe_sect, 0, 0, 0);
- while (1) {
- sector_t log_offset = r5l_ring_add(log, ctx->pos,
- ctx->meta_total_blocks);
- payload = page_address(ctx->meta_page) + *offset;
-
- if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) {
- raid5_compute_sector(conf,
- le64_to_cpu(payload->location), 0,
- &disk_index, sh);
-
- sync_page_io(log->rdev, log_offset, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_READ, 0,
- false);
- sh->dev[disk_index].log_checksum =
- le32_to_cpu(payload->checksum[0]);
- set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
- } else {
- disk_index = sh->pd_idx;
- sync_page_io(log->rdev, log_offset, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_READ, 0,
- false);
- sh->dev[disk_index].log_checksum =
- le32_to_cpu(payload->checksum[0]);
- set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
-
- if (sh->qd_idx >= 0) {
- disk_index = sh->qd_idx;
- sync_page_io(log->rdev,
- r5l_ring_add(log, log_offset, BLOCK_SECTORS),
- PAGE_SIZE, sh->dev[disk_index].page,
- REQ_OP_READ, 0, false);
- sh->dev[disk_index].log_checksum =
- le32_to_cpu(payload->checksum[1]);
- set_bit(R5_Wantwrite,
- &sh->dev[disk_index].flags);
- }
- }
-
- ctx->meta_total_blocks += le32_to_cpu(payload->size);
- *offset += sizeof(struct r5l_payload_data_parity) +
- sizeof(__le32) *
- (le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
- if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY)
- break;
- }
-
- for (disk_index = 0; disk_index < sh->disks; disk_index++) {
- void *addr;
- u32 checksum;
-
- if (!test_bit(R5_Wantwrite, &sh->dev[disk_index].flags))
- continue;
- addr = kmap_atomic(sh->dev[disk_index].page);
- checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
- kunmap_atomic(addr);
- if (checksum != sh->dev[disk_index].log_checksum)
- goto error;
- }
-
- for (disk_index = 0; disk_index < sh->disks; disk_index++) {
- struct md_rdev *rdev, *rrdev;
-
- if (!test_and_clear_bit(R5_Wantwrite,
- &sh->dev[disk_index].flags))
- continue;
-
- /* in case device is broken */
- rcu_read_lock();
- rdev = rcu_dereference(conf->disks[disk_index].rdev);
- if (rdev) {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- sync_page_io(rdev, stripe_sect, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
- false);
- rdev_dec_pending(rdev, rdev->mddev);
- rcu_read_lock();
- }
- rrdev = rcu_dereference(conf->disks[disk_index].replacement);
- if (rrdev) {
- atomic_inc(&rrdev->nr_pending);
- rcu_read_unlock();
- sync_page_io(rrdev, stripe_sect, PAGE_SIZE,
- sh->dev[disk_index].page, REQ_OP_WRITE, 0,
- false);
- rdev_dec_pending(rrdev, rrdev->mddev);
- rcu_read_lock();
- }
- rcu_read_unlock();
- }
- raid5_release_stripe(sh);
- return 0;
-
-error:
- for (disk_index = 0; disk_index < sh->disks; disk_index++)
- sh->dev[disk_index].flags = 0;
- raid5_release_stripe(sh);
- return -EINVAL;
-}
-
-static int r5l_recovery_flush_one_meta(struct r5l_log *log,
- struct r5l_recovery_ctx *ctx)
-{
- struct r5conf *conf = log->rdev->mddev->private;
- struct r5l_payload_data_parity *payload;
- struct r5l_meta_block *mb;
- int offset;
- sector_t stripe_sector;
-
- mb = page_address(ctx->meta_page);
- offset = sizeof(struct r5l_meta_block);
-
- while (offset < le32_to_cpu(mb->meta_size)) {
- int dd;
-
- payload = (void *)mb + offset;
- stripe_sector = raid5_compute_sector(conf,
- le64_to_cpu(payload->location), 0, &dd, NULL);
- if (r5l_recovery_flush_one_stripe(log, ctx, stripe_sector,
- &offset))
- return -EINVAL;
- }
- return 0;
-}
-
-/* copy data/parity from log to raid disks */
-static void r5l_recovery_flush_log(struct r5l_log *log,
- struct r5l_recovery_ctx *ctx)
-{
- while (1) {
- if (r5l_recovery_read_meta_block(log, ctx))
- return;
- if (r5l_recovery_flush_one_meta(log, ctx))
- return;
- ctx->seq++;
- ctx->pos = r5l_ring_add(log, ctx->pos, ctx->meta_total_blocks);
- }
-}
-
static void
r5l_recovery_create_empty_meta_block(struct r5l_log *log,
struct page *page,
@@ -2166,7 +2016,9 @@
static int r5l_recovery_log(struct r5l_log *log)
{
+ struct mddev *mddev = log->rdev->mddev;
struct r5l_recovery_ctx ctx;
+ int ret;
ctx.pos = log->last_checkpoint;
ctx.seq = log->last_cp_seq;
@@ -2178,47 +2030,33 @@
if (!ctx.meta_page)
return -ENOMEM;
- r5l_recovery_flush_log(log, &ctx);
+ ret = r5c_recovery_flush_log(log, &ctx);
__free_page(ctx.meta_page);
- /*
- * we did a recovery. Now ctx.pos points to an invalid meta block. New
- * log will start here. but we can't let superblock point to last valid
- * meta block. The log might looks like:
- * | meta 1| meta 2| meta 3|
- * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
- * superblock points to meta 1, we write a new valid meta 2n. if crash
- * happens again, new recovery will start from meta 1. Since meta 2n is
- * valid now, recovery will think meta 3 is valid, which is wrong.
- * The solution is we create a new meta in meta2 with its seq == meta
- * 1's seq + 10 and let superblock points to meta2. The same recovery will
- * not think meta 3 is a valid meta, because its seq doesn't match
- */
- if (ctx.seq > log->last_cp_seq) {
- int ret;
+ if (ret)
+ return ret;
- ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10);
- if (ret)
- return ret;
- log->seq = ctx.seq + 11;
- log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
- r5l_write_super(log, ctx.pos);
- log->last_checkpoint = ctx.pos;
- log->next_checkpoint = ctx.pos;
- } else {
- log->log_start = ctx.pos;
- log->seq = ctx.seq;
+ if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
+ pr_debug("md/raid:%s: starting from clean shutdown\n",
+ mdname(mddev));
+ else {
+ pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
+ mdname(mddev), ctx.data_only_stripes,
+ ctx.data_parity_stripes);
+
+ if (ctx.data_only_stripes > 0)
+ if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+ pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+ mdname(mddev));
+ return -EIO;
+ }
}
- /*
- * This is to suppress "function defined but not used" warning.
- * It will be removed when the two functions are used (next patch).
- */
- if (!log) {
- r5c_recovery_flush_log(log, &ctx);
- r5c_recovery_rewrite_data_only_stripes(log, &ctx);
- }
-
+ log->log_start = ctx.pos;
+ log->next_checkpoint = ctx.pos;
+ log->seq = ctx.seq;
+ r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq);
+ r5l_write_super(log, ctx.pos);
return 0;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 17cf98e..aa4968c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7100,7 +7100,8 @@
pr_debug("md/raid:%s: using device %s as journal\n",
mdname(mddev), bdevname(journal_dev->bdev, b));
- r5l_init_log(conf, journal_dev);
+ if (r5l_init_log(conf, journal_dev))
+ goto abort;
}
return 0;