Merge tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Mar 2024 20:56:41 +0000 (13:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Mar 2024 20:56:41 +0000 (13:56 -0700)
Pull btrfs fixes from David Sterba:

 - fix race when reading extent buffer and 'uptodate' status is missed
   by one thread (introduced in 6.5)

 - do additional validation of devices using major:minor numbers

 - zoned mode fixes:
     - use zone-aware super block access during scrub
     - fix use-after-free during device replace (found by KASAN)
     - also delete zones that are 100% unusable to reclaim space

 - extent unpinning fixes:
     - fix extent map leak after error handling
     - print correct range in error message

 - error code and message updates

* tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race in read_extent_buffer_pages()
  btrfs: return accurate error code on open failure in open_fs_devices()
  btrfs: zoned: don't skip block groups with 100% zone unusable
  btrfs: use btrfs_warn() to log message at btrfs_add_extent_mapping()
  btrfs: fix message not properly printing interval when adding extent map
  btrfs: fix warning messages not printing interval at unpin_extent_range()
  btrfs: fix extent map leak in unexpected scenario at unpin_extent_cache()
  btrfs: validate device maj:min during open
  btrfs: zoned: fix use-after-free in do_zone_finish()
  btrfs: zoned: use zone aware sb location for scrub

fs/btrfs/block-group.c
fs/btrfs/extent_io.c
fs/btrfs/extent_map.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/zoned.c

index 5f7587ca1ca7720d26a7cd192fdcd7ccd97c55bd..1e09aeea69c22e011b5a8f305421b342d04aa8b4 100644 (file)
@@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 * needing to allocate extents from the block group.
                 */
                used = btrfs_space_info_used(space_info, true);
-               if (space_info->total_bytes - block_group->length < used) {
+               if (space_info->total_bytes - block_group->length < used &&
+                   block_group->zone_unusable < block_group->length) {
                        /*
                         * Add a reference for the list, compensate for the ref
                         * drop under the "next" label for the
index 7441245b1ceb1508558782a88759fd34ddb84818..61594eaf1f8969fc3ba04604e3470a8932450767 100644 (file)
@@ -4333,6 +4333,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
        if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
                goto done;
 
+       /*
+        * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
+        * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have
+        * started and finished reading the same eb.  In this case, UPTODATE
+        * will now be set, and we shouldn't read it in again.
+        */
+       if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
+               clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+               smp_mb__after_atomic();
+               wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+               return 0;
+       }
+
        clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
        eb->read_mirror = 0;
        check_buffer_tree_ref(eb);
index 347ca13d15a97586b36669e823e2ed3bec86449f..445f7716f1e2f70b3f780e7c1f03d020f0eb6346 100644 (file)
@@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
                btrfs_warn(fs_info,
 "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
                           btrfs_ino(inode), btrfs_root_id(inode->root),
-                          start, len, gen);
+                          start, start + len, gen);
                ret = -ENOENT;
                goto out;
        }
@@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
                btrfs_warn(fs_info,
 "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
                           btrfs_ino(inode), btrfs_root_id(inode->root),
-                          em->start, start, len, gen);
+                          em->start, start, start + len, gen);
                ret = -EUCLEAN;
                goto out;
        }
@@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
                em->mod_len = em->len;
        }
 
-       free_extent_map(em);
 out:
        write_unlock(&tree->lock);
+       free_extent_map(em);
        return ret;
 
 }
@@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
                         */
                        ret = merge_extent_mapping(em_tree, existing,
                                                   em, start);
-                       if (ret) {
+                       if (WARN_ON(ret)) {
                                free_extent_map(em);
                                *em_in = NULL;
-                               WARN_ONCE(ret,
-"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
-                                         existing->start, existing->len,
-                                         orig_start, orig_len, start);
+                               btrfs_warn(fs_info,
+"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
+                                          existing->start, extent_map_end(existing),
+                                          orig_start, orig_start + orig_len, start);
                        }
                        free_extent_map(existing);
                }
index c4bd0e60db59253f280236740d23ed1c5b7b92f7..fa25004ab04e7b28d73dee024303c0dab4077db6 100644 (file)
@@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
                gen = btrfs_get_last_trans_committed(fs_info);
 
        for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
-               bytenr = btrfs_sb_offset(i);
+               ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
+               if (ret == -ENOENT)
+                       break;
+
+               if (ret) {
+                       spin_lock(&sctx->stat_lock);
+                       sctx->stat.super_errors++;
+                       spin_unlock(&sctx->stat_lock);
+                       continue;
+               }
+
                if (bytenr + BTRFS_SUPER_INFO_SIZE >
                    scrub_dev->commit_total_bytes)
                        break;
index 1dc1f1946ae0eb3158a38b2d214746e7cc4a09ee..f15591f3e54fa4cd7e92103e17b0ae74eb1a54f9 100644 (file)
@@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
        device->bdev = file_bdev(bdev_file);
        clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 
+       if (device->devt != device->bdev->bd_dev) {
+               btrfs_warn(NULL,
+                          "device %s maj:min changed from %d:%d to %d:%d",
+                          device->name->str, MAJOR(device->devt),
+                          MINOR(device->devt), MAJOR(device->bdev->bd_dev),
+                          MINOR(device->bdev->bd_dev));
+
+               device->devt = device->bdev->bd_dev;
+       }
+
        fs_devices->open_devices++;
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
        struct btrfs_device *device;
        struct btrfs_device *latest_dev = NULL;
        struct btrfs_device *tmp_device;
+       int ret = 0;
 
        list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
                                 dev_list) {
-               int ret;
+               int ret2;
 
-               ret = btrfs_open_one_device(fs_devices, device, flags, holder);
-               if (ret == 0 &&
+               ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
+               if (ret2 == 0 &&
                    (!latest_dev || device->generation > latest_dev->generation)) {
                        latest_dev = device;
-               } else if (ret == -ENODATA) {
+               } else if (ret2 == -ENODATA) {
                        fs_devices->num_devices--;
                        list_del(&device->dev_list);
                        btrfs_free_device(device);
                }
+               if (ret == 0 && ret2 != 0)
+                       ret = ret2;
        }
-       if (fs_devices->open_devices == 0)
+
+       if (fs_devices->open_devices == 0) {
+               if (ret)
+                       return ret;
                return -EINVAL;
+       }
 
        fs_devices->opened = 1;
        fs_devices->latest_dev = latest_dev;
index 5a3d5ec75c5a94262c2431fce61c84d1e95f7512..4cba80b34387c102a15299a69f1bd11fc0caff2f 100644 (file)
@@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
        if (!map)
                return -EINVAL;
 
-       cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS);
-       if (!cache->physical_map) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       cache->physical_map = map;
 
        zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
        if (!zone_info) {
@@ -1690,7 +1686,6 @@ out:
        }
        bitmap_free(active);
        kfree(zone_info);
-       btrfs_free_chunk_map(map);
 
        return ret;
 }
@@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        struct btrfs_chunk_map *map;
        const bool is_metadata = (block_group->flags &
                        (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
+       struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        int ret = 0;
        int i;
 
@@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        btrfs_clear_data_reloc_bg(block_group);
        spin_unlock(&block_group->lock);
 
+       down_read(&dev_replace->rwsem);
        map = block_group->physical_map;
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
@@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
                                       zinfo->zone_size >> SECTOR_SHIFT);
                memalloc_nofs_restore(nofs_flags);
 
-               if (ret)
+               if (ret) {
+                       up_read(&dev_replace->rwsem);
                        return ret;
+               }
 
                if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
                        zinfo->reserved_active_zones++;
                btrfs_dev_clear_active_zone(device, physical);
        }
+       up_read(&dev_replace->rwsem);
 
        if (!fully_written)
                btrfs_dec_block_group_ro(block_group);