bcachefs: Reconstruct missing snapshot nodes
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 28 Mar 2024 02:50:19 +0000 (22:50 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Wed, 3 Apr 2024 18:46:51 +0000 (14:46 -0400)
When the snapshots btree is going, we'll have to delete huge amounts of
data - unless we can reconstruct it by looking at the keys that refer to
it.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes_types.h
fs/bcachefs/sb-errors_types.h
fs/bcachefs/snapshot.c
fs/bcachefs/snapshot.h

index d1a0e54785032b62f37be904d10b2328f9d22d3c..a31a5f706929eb2006e4867a38123b9526639cee 100644 (file)
@@ -615,6 +615,7 @@ struct bch_dev {
  */
 
 #define BCH_FS_FLAGS()                 \
+       x(new_fs)                       \
        x(started)                      \
        x(may_go_rw)                    \
        x(rw)                           \
index b3c67ae3d3b2e2357878c18f70092ebd551ef111..b76c16152579c6d3e5a51dbf54c839392c0ce0b2 100644 (file)
@@ -938,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        int ret;
 
        bch_notice(c, "initializing new filesystem");
+       set_bit(BCH_FS_new_fs, &c->flags);
 
        mutex_lock(&c->sb_lock);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
index 840542cfd65b643d187440f7baf2213260202ae9..773aea9a0080fd6e6105ed4f9e8a91fde1d125ee 100644 (file)
@@ -32,6 +32,7 @@
        x(check_alloc_to_lru_refs,              15, PASS_ONLINE|PASS_FSCK)      \
        x(fs_freespace_init,                    16, PASS_ALWAYS|PASS_SILENT)    \
        x(bucket_gens_init,                     17, 0)                          \
+       x(reconstruct_snapshots,                38, 0)                          \
        x(check_snapshot_trees,                 18, PASS_ONLINE|PASS_FSCK)      \
        x(check_snapshots,                      19, PASS_ONLINE|PASS_FSCK)      \
        x(check_subvols,                        20, PASS_ONLINE|PASS_FSCK)      \
index 2fec03a24c95b5214c5977ad1226914313d6901c..2f8f4d2388b045318c0fc90af1d9a96d25ef33c1 100644 (file)
        x(btree_node_bkey_bad_u64s,                             260)    \
        x(btree_node_topology_empty_interior_node,              261)    \
        x(btree_ptr_v2_min_key_bad,                             262)    \
-       x(btree_root_unreadable_and_scan_found_nothing,         263)
+       x(btree_root_unreadable_and_scan_found_nothing,         263)    \
+       x(snapshot_node_missing,                                264)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index 4577ee7939a2a5d399b554b261a2f1ffdc74542e..0e806f04f3d7c5117ade3d612b1c851da243aead 100644 (file)
@@ -8,6 +8,7 @@
 #include "errcode.h"
 #include "error.h"
 #include "fs.h"
+#include "recovery_passes.h"
 #include "snapshot.h"
 
 #include <linux/random.h>
@@ -574,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
                u32 subvol_id;
 
                ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+               bch_err_fn(c, ret);
+
+               if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+                       ret = 0;
+                       goto err;
+               }
+
                if (ret)
                        goto err;
 
@@ -731,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
        u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
        u32 real_depth;
        struct printbuf buf = PRINTBUF;
-       bool should_have_subvol;
        u32 i, id;
        int ret = 0;
 
@@ -777,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
                }
        }
 
-       should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+       bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
                !BCH_SNAPSHOT_DELETED(&s);
 
        if (should_have_subvol) {
@@ -879,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
        return ret;
 }
 
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+       struct bch_fs *c = trans->c;
+
+       if (bch2_snapshot_equiv(c, id))
+               return 0;
+
+       u32 tree_id;
+       int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+       if (ret)
+               return ret;
+
+       struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+       ret = PTR_ERR_OR_ZERO(snapshot);
+       if (ret)
+               return ret;
+
+       bkey_snapshot_init(&snapshot->k_i);
+       snapshot->k.p           = POS(0, id);
+       snapshot->v.tree        = cpu_to_le32(tree_id);
+       snapshot->v.btime.lo    = cpu_to_le64(bch2_current_time(c));
+
+       return  bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+               bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+                                  bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+               bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+       enum btree_id                   btree;
+       struct bpos                     cur_pos;
+       snapshot_id_list                cur_ids;
+       DARRAY(snapshot_id_list)        trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+       darray_for_each(r->trees, i)
+               darray_exit(i);
+       darray_exit(&r->trees);
+       darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+       return r->btree == BTREE_ID_inodes
+               ? r->cur_pos.offset == pos.offset
+               : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+       darray_for_each(*l, i)
+               if (snapshot_list_has_id(r, *i))
+                       return true;
+       return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+       bool first = true;
+       darray_for_each(*s, i) {
+               if (!first)
+                       prt_char(out, ' ');
+               first = false;
+               prt_printf(out, "%u", *i);
+       }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+       if (r->cur_ids.nr) {
+               darray_for_each(r->trees, i)
+                       if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+                               int ret = snapshot_list_merge(c, i, &r->cur_ids);
+                               if (ret)
+                                       return ret;
+                               goto out;
+                       }
+               darray_push(&r->trees, r->cur_ids);
+               darray_init(&r->cur_ids);
+       }
+out:
+       r->cur_ids.nr = 0;
+       return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+       if (!same_snapshot(r, pos))
+               snapshot_tree_reconstruct_next(c, r);
+       r->cur_pos = pos;
+       return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+       struct btree_trans *trans = bch2_trans_get(c);
+       struct printbuf buf = PRINTBUF;
+       struct snapshot_tree_reconstruct r = {};
+       int ret = 0;
+
+       for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+               if (btree_type_has_snapshots(btree)) {
+                       r.btree = btree;
+
+                       ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+                                       BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+                               get_snapshot_trees(c, &r, k.k->p);
+                       }));
+                       if (ret)
+                               goto err;
+
+                       snapshot_tree_reconstruct_next(c, &r);
+               }
+       }
+
+       darray_for_each(r.trees, t) {
+               printbuf_reset(&buf);
+               snapshot_id_list_to_text(&buf, t);
+
+               darray_for_each(*t, id) {
+                       if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+                                       c, snapshot_node_missing,
+                                       "snapshot node %u from tree %s missing", *id, buf.buf)) {
+                               if (t->nr > 1) {
+                                       bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+                                       ret = -BCH_ERR_fsck_repair_unimplemented;
+                                       goto err;
+                               }
+
+                               ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                                               check_snapshot_exists(trans, *id));
+                               if (ret)
+                                       goto err;
+                       }
+               }
+       }
+fsck_err:
+err:
+       bch2_trans_put(trans);
+       snapshot_tree_reconstruct_exit(&r);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 /*
  * Mark a snapshot as deleted, for future cleanup:
  */
@@ -1689,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
                                   POS_MIN, 0, k,
                           (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
        bch_err_fn(c, ret);
+
+       /*
+        * It's important that we check if we need to reconstruct snapshots
+        * before going RW, so we mark that pass as required in the superblock -
+        * otherwise, we could end up deleting keys with missing snapshot nodes
+        * instead
+        */
+       BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+              test_bit(BCH_FS_may_go_rw, &c->flags));
+
+       if (bch2_err_matches(ret, EIO) ||
+           (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
        return ret;
 }
 
index 331f20fd8d03d17c363a7ac060f704c84123d0b5..b7d2fed37c4f31167fe036bb9967ac084c733edf 100644 (file)
@@ -209,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
 
 static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
 {
-       int ret;
-
        BUG_ON(snapshot_list_has_id(s, id));
-       ret = darray_push(s, id);
+       int ret = darray_push(s, id);
        if (ret)
                bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
        return ret;
 }
 
+static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+       int ret = snapshot_list_has_id(s, id)
+               ? 0
+               : darray_push(s, id);
+       if (ret)
+               bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+       return ret;
+}
+
+static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
+{
+       darray_for_each(*src, i) {
+               int ret = snapshot_list_add_nodup(c, dst, *i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
                         struct bch_snapshot *s);
 int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@@ -229,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
 
 int bch2_check_snapshot_trees(struct bch_fs *);
 int bch2_check_snapshots(struct bch_fs *);
+int bch2_reconstruct_snapshots(struct bch_fs *);
 
 int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
 void bch2_delete_dead_snapshots_work(struct work_struct *);