bcachefs: Improve -o norecovery; opts.recovery_pass_limit
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 29 Mar 2024 01:34:14 +0000 (21:34 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Apr 2024 00:36:12 +0000 (20:36 -0400)
This adds opts.recovery_pass_limit, and redoes -o norecovery to make use
of it; this fixes some issues with -o norecovery so it can be safely
used for data recovery.

Norecovery means "don't do journal replay"; it's an important data
recovery tool when we're getting stuck in journal replay.

When using it this way we need to make sure we don't free journal keys
after startup, so we continue to overlay them: thus it needs to imply
retain_recovery_info, as well as nochanges.

recovery_pass_limit is an explicit option for telling recovery to exit
after a specific recovery pass; this is a much cleaner way of
implementing -o norecovery, as well as being a useful debug feature in
its own right.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/opts.c
fs/bcachefs/opts.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes.c
fs/bcachefs/snapshot.c
fs/bcachefs/super.c

index 08ea0cfc4aef08acfd4d0fe33e0d8227f212cb02..e1800c4119b5fbaf8ebbfcdaef996e1dd9c35ca8 100644 (file)
@@ -7,6 +7,7 @@
 #include "disk_groups.h"
 #include "error.h"
 #include "opts.h"
+#include "recovery_passes.h"
 #include "super-io.h"
 #include "util.h"
 
@@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
 #define OPT_STR(_choices)      .type = BCH_OPT_STR,                    \
                                .min = 0, .max = ARRAY_SIZE(_choices),  \
                                .choices = _choices
+#define OPT_STR_NOLIMIT(_choices)      .type = BCH_OPT_STR,            \
+                               .min = 0, .max = U64_MAX,               \
+                               .choices = _choices
 #define OPT_FN(_fn)            .type = BCH_OPT_FN, .fn = _fn
 
 #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help)        \
index 136083c11f3a3aecc575501c33c0b3868f38113f..084247c13bd8fd18133708f131132d6e55b53aaa 100644 (file)
@@ -362,7 +362,12 @@ enum fsck_err_opts {
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
-         NULL,         "Don't replay the journal")                     \
+         NULL,         "Exit recovery immediately prior to journal replay")\
+       x(recovery_pass_last,           u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_STR_NOLIMIT(bch2_recovery_passes),                        \
+         BCH2_NO_SB_OPT,               0,                              \
+         NULL,         "Exit recovery after specified pass")           \
        x(keep_journal,                 u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
index e3b06430d6067ba0d2dd15c9812b833fcfc123af..e8b434009293627331f42d2dcf2dbaa896009c57 100644 (file)
@@ -269,7 +269,8 @@ int bch2_journal_replay(struct bch_fs *c)
        bch2_trans_put(trans);
        trans = NULL;
 
-       if (!c->opts.keep_journal)
+       if (!c->opts.keep_journal &&
+           c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
                bch2_journal_keys_put_initial(c);
 
        replay_now_at(j, j->replay_journal_seq_end);
@@ -584,11 +585,8 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        }
 
-       if (c->opts.fsck && c->opts.norecovery) {
-               bch_err(c, "cannot select both norecovery and fsck");
-               ret = -EINVAL;
-               goto err;
-       }
+       if (c->opts.norecovery)
+               c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
 
        if (!c->opts.nochanges) {
                mutex_lock(&c->sb_lock);
index fb22cce10f668330b8395884ba908d3603db4952..0089d9456b100647cc60bc805885f19af2f39c89 100644 (file)
@@ -27,7 +27,7 @@ const char * const bch2_recovery_passes[] = {
 
 static int bch2_check_allocations(struct bch_fs *c)
 {
-       return bch2_gc(c, true, c->opts.norecovery);
+       return bch2_gc(c, true, false);
 }
 
 static int bch2_set_may_go_rw(struct bch_fs *c)
@@ -144,8 +144,6 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
 {
        struct recovery_pass_fn *p = recovery_pass_fns + pass;
 
-       if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
-               return false;
        if (c->recovery_passes_explicit & BIT_ULL(pass))
                return true;
        if ((p->when & PASS_FSCK) && c->opts.fsck)
@@ -201,6 +199,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
        int ret = 0;
 
        while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+               if (c->opts.recovery_pass_last &&
+                   c->curr_recovery_pass > c->opts.recovery_pass_last)
+                       break;
+
                if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
                        unsigned pass = c->curr_recovery_pass;
 
@@ -213,8 +215,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
 
                        c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
                }
-               c->curr_recovery_pass++;
+
                c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+               c->curr_recovery_pass++;
        }
 
        return ret;
index 4e074136c490793cdd11a42ccf061dcb5cf31176..4577ee7939a2a5d399b554b261a2f1ffdc74542e 100644 (file)
@@ -131,7 +131,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
        rcu_read_lock();
        struct snapshot_table *t = rcu_dereference(c->snapshots);
 
-       if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+       if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
                ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
                goto out;
        }
index 89ee5d3ec11963b8167bd47673d9119652ef88a2..bc026a77eb99d4141c74603885b410373c6a5104 100644 (file)
@@ -365,7 +365,7 @@ void bch2_fs_read_only(struct bch_fs *c)
            !test_bit(BCH_FS_emergency_ro, &c->flags) &&
            test_bit(BCH_FS_started, &c->flags) &&
            test_bit(BCH_FS_clean_shutdown, &c->flags) &&
-           !c->opts.norecovery) {
+           c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
                BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
                BUG_ON(atomic_read(&c->btree_cache.dirty));
                BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -510,7 +510,8 @@ err:
 
 int bch2_fs_read_write(struct bch_fs *c)
 {
-       if (c->opts.norecovery)
+       if (c->opts.recovery_pass_last &&
+           c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
                return -BCH_ERR_erofs_norecovery;
 
        if (c->opts.nochanges)