]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
ext4: remove unnecessary s_md_lock on update s_mb_last_group
authorBaokun Li <libaokun1@huawei.com>
Mon, 14 Jul 2025 13:03:14 +0000 (21:03 +0800)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 25 Jul 2025 13:14:16 +0000 (09:14 -0400)
After we optimized the block group lock, we found another lock
contention issue when running will-it-scale/fallocate2 with multiple
processes. The fallocate's block allocation and the truncate's block
release were fighting over the s_md_lock. The problem is, this lock
protects totally different things in those two processes: the list of
freed data blocks (s_freed_data_list) when releasing, and where to start
looking for new blocks (mb_last_group) when allocating.

Now we only need to track s_mb_last_group and no longer need to track
s_mb_last_start, so we don't need the s_md_lock lock to ensure that the
two are consistent. Since s_mb_last_group is merely a hint and doesn't
require strong synchronization, READ_ONCE/WRITE_ONCE is sufficient.

Besides, the s_mb_last_group data type only requires ext4_group_t
(i.e., unsigned int), rendering unsigned long superfluous.

Performance test data follows:

Test: Running will-it-scale/fallocate2 on CPU-bound containers.
Observation: Average fallocate operations per container per second.

|CPU: Kunpeng 920   |          P80           |            P1           |
|Memory: 512GB      |------------------------|-------------------------|
|960GB SSD (0.5GB/s)| base  |    patched     | base   |    patched     |
|-------------------|-------|----------------|--------|----------------|
|mb_optimize_scan=0 | 4821  | 9636  (+99.8%) | 314065 | 337597 (+7.4%) |
|mb_optimize_scan=1 | 4784  | 4834  (+1.04%) | 316344 | 341440 (+7.9%) |

|CPU: AMD 9654 * 2  |          P96           |             P1          |
|Memory: 1536GB     |------------------------|-------------------------|
|960GB SSD (1GB/s)  | base  |    patched     | base   |    patched     |
|-------------------|-------|----------------|--------|----------------|
|mb_optimize_scan=0 | 15371 | 22341 (+45.3%) | 205851 | 219707 (+6.7%) |
|mb_optimize_scan=1 | 6101  | 9177  (+50.4%) | 207373 | 215732 (+4.0%) |

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20250714130327.1830534-5-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/mballoc.c

index a0908037ced789be8ca27ef890a97353a2a480a6..aae7dac5aca6ca7a8c231c673acc4130903a28d7 100644 (file)
@@ -1630,7 +1630,7 @@ struct ext4_sb_info {
        unsigned int s_mb_group_prealloc;
        unsigned int s_max_dir_size_kb;
        /* where last allocation was done - for stream allocation */
-       unsigned long s_mb_last_group;
+       ext4_group_t s_mb_last_group;
        unsigned int s_mb_prefetch;
        unsigned int s_mb_prefetch_limit;
        unsigned int s_mb_best_avail_max_trim_order;
index e3a5103e1620e7cf17fcdfdb2e89c1d8883d0544..025b759ca6437f0df00e8ebc55e000ce349e9123 100644 (file)
@@ -2168,11 +2168,8 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
        ac->ac_buddy_folio = e4b->bd_buddy_folio;
        folio_get(ac->ac_buddy_folio);
        /* store last allocated for subsequent stream allocation */
-       if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
-               spin_lock(&sbi->s_md_lock);
-               sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
-               spin_unlock(&sbi->s_md_lock);
-       }
+       if (ac->ac_flags & EXT4_MB_STREAM_ALLOC)
+               WRITE_ONCE(sbi->s_mb_last_group, ac->ac_f_ex.fe_group);
        /*
         * As we've just preallocated more space than
         * user requested originally, we store allocated
@@ -2845,10 +2842,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 
        /* if stream allocation is enabled, use global goal */
        if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
-               /* TBD: may be hot point */
-               spin_lock(&sbi->s_md_lock);
-               ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
-               spin_unlock(&sbi->s_md_lock);
+               ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_group);
                ac->ac_g_ex.fe_start = -1;
                ac->ac_flags &= ~EXT4_MB_HINT_TRY_GOAL;
        }