bcachefs: Flag btrees with missing data
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 16 Mar 2024 03:03:42 +0000 (23:03 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Wed, 3 Apr 2024 18:46:51 +0000 (14:46 -0400)
We need this to know when we should attempt to reconstruct the snapshots
btree

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_io.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery.h
fs/bcachefs/super-io.c

index 93a61dbaa3d8691ef5fe28af55f43ff6162465eb..d1a0e54785032b62f37be904d10b2328f9d22d3c 100644 (file)
@@ -797,6 +797,7 @@ struct bch_fs {
                u64             features;
                u64             compat;
                unsigned long   errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+               u64             btrees_lost_data;
        }                       sb;
 
 
index bff8750ac0d743aa22f2cbea9effbf77bf6be725..63102992d9556d1b33b445a3116df61964a6ca01 100644 (file)
@@ -818,6 +818,7 @@ struct bch_sb_field_ext {
        struct bch_sb_field     field;
        __le64                  recovery_passes_required[2];
        __le64                  errors_silent[8];
+       __le64                  btrees_lost_data;
 };
 
 struct bch_sb_field_downgrade_entry {
index f3f27bb85a5ba4466b5ae6403837ce887073d81c..d7de82ac389354f9a0d5eef0a66c8694f1752b94 100644 (file)
@@ -1264,10 +1264,12 @@ out:
        return retry_read;
 fsck_err:
        if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
-           ret == -BCH_ERR_btree_node_read_err_must_retry)
+           ret == -BCH_ERR_btree_node_read_err_must_retry) {
                retry_read = 1;
-       else
+       } else {
                set_btree_node_read_error(b);
+               bch2_btree_lost_data(c, b->c.btree_id);
+       }
        goto out;
 }
 
@@ -1328,6 +1330,7 @@ start:
 
                if (!can_retry) {
                        set_btree_node_read_error(b);
+                       bch2_btree_lost_data(c, b->c.btree_id);
                        break;
                }
        }
@@ -1527,9 +1530,10 @@ fsck_err:
                ret = -1;
        }
 
-       if (ret)
+       if (ret) {
                set_btree_node_read_error(b);
-       else if (*saw_error)
+               bch2_btree_lost_data(c, b->c.btree_id);
+       } else if (*saw_error)
                bch2_btree_node_rewrite_async(c, b);
 
        for (i = 0; i < ra->nr; i++) {
@@ -1665,6 +1669,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
                        bch2_fatal_error(c);
 
                set_btree_node_read_error(b);
+               bch2_btree_lost_data(c, b->c.btree_id);
                clear_btree_node_read_in_flight(b);
                wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
                printbuf_exit(&buf);
index 24671020f22b1a6a77addfe582e0d55b34225795..b3c67ae3d3b2e2357878c18f70092ebd551ef111 100644 (file)
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
+void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+{
+       u64 b = BIT_ULL(btree);
+
+       if (!(c->sb.btrees_lost_data & b)) {
+               bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+
+               mutex_lock(&c->sb_lock);
+               bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+               bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       }
+}
+
 static bool btree_id_is_alloc(enum btree_id id)
 {
        switch (id) {
@@ -470,6 +484,7 @@ static int read_btree_roots(struct bch_fs *c)
                        }
 
                        ret = 0;
+                       bch2_btree_lost_data(c, i);
                }
        }
 
@@ -848,6 +863,14 @@ use_clean:
                write_sb = true;
        }
 
+       if (c->opts.fsck &&
+           !test_bit(BCH_FS_error, &c->flags) &&
+           c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+           ext->btrees_lost_data) {
+               ext->btrees_lost_data = 0;
+               write_sb = true;
+       }
+
        if (c->opts.fsck &&
            !test_bit(BCH_FS_error, &c->flags) &&
            !test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
index 3962fd87b50d8fd595ec76a7de2ae54b331ef1e4..4bf818de1f2feb1f6010eaff3a8eccbbf1e6d2c6 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _BCACHEFS_RECOVERY_H
 #define _BCACHEFS_RECOVERY_H
 
+void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
+
 int bch2_journal_replay(struct bch_fs *);
 
 int bch2_fs_recovery(struct bch_fs *);
index e15f8b1f30c2ac8badb9d5364468ec93de233d7f..e0aa3655b63b4cd7ca8dd2ee03e0370474427ab3 100644 (file)
@@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
        memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
 
        struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
-       if (ext)
+       if (ext) {
                le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
                                    sizeof(c->sb.errors_silent) * 8);
+               c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
+       }
 
        for_each_member_device(c, ca) {
                struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@@ -1162,6 +1164,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
 
                kfree(errors_silent);
        }
+
+       prt_printf(out, "Btrees with missing data:");
+       prt_tab(out);
+       prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
+       prt_newline(out);
 }
 
 static const struct bch_sb_field_ops bch_sb_field_ops_ext = {