From: Suren Baghdasaryan Date: Mon, 4 Aug 2025 23:33:48 +0000 (-0700) Subject: mm: limit the scope of vma_start_read() X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=b2e729675f93758a5dccfd980c3daf5f6ae0252e;p=users%2Fjedix%2Flinux-maple.git mm: limit the scope of vma_start_read() Limit the scope of vma_start_read() as it is used only as a helper for higher-level locking functions implemented inside mmap_lock.c and we are about to introduce more complex RCU rules for this function. The change is pure code refactoring and has no functional changes. Link: https://lkml.kernel.org/r/20250804233349.1278678-1-surenb@google.com Suggested-by: Vlastimil Babka Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Reviewed-by: Vlastimil Babka Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 11a078de9150..2c9fffa58714 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -147,91 +147,6 @@ static inline void vma_refcount_put(struct vm_area_struct *vma) } } -/* - * Try to read-lock a vma. The function is allowed to occasionally yield false - * locked result to avoid performance overhead, in which case we fall back to - * using mmap_lock. The function should never yield false unlocked result. - * False locked result is possible if mm_lock_seq overflows or if vma gets - * reused and attached to a different mm before we lock it. - * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got - * detached. - * - * WARNING! The vma passed to this function cannot be used if the function - * fails to lock it because in certain cases RCU lock is dropped and then - * reacquired. Once RCU lock is dropped the vma can be concurently freed. - */ -static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, - struct vm_area_struct *vma) -{ - int oldcnt; - - /* - * Check before locking. A race might cause false locked result. - * We can use READ_ONCE() for the mm_lock_seq here, and don't need - * ACQUIRE semantics, because this is just a lockless check whose result - * we don't rely on for anything - the mm_lock_seq read against which we - * need ordering is below. - */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) - return NULL; - - /* - * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() - * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. - * Acquire fence is required here to avoid reordering against later - * vm_lock_seq check and checks inside lock_vma_under_rcu(). - */ - if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, - VMA_REF_LIMIT))) { - /* return EAGAIN if vma got detached from under us */ - return oldcnt ? NULL : ERR_PTR(-EAGAIN); - } - - rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); - - /* - * If vma got attached to another mm from under us, that mm is not - * stable and can be freed in the narrow window after vma->vm_refcnt - * is dropped and before rcuwait_wake_up(mm) is called. Grab it before - * releasing vma->vm_refcnt. - */ - if (unlikely(vma->vm_mm != mm)) { - /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ - struct mm_struct *other_mm = vma->vm_mm; - - /* - * __mmdrop() is a heavy operation and we don't need RCU - * protection here. Release RCU lock during these operations. - * We reinstate the RCU read lock as the caller expects it to - * be held when this function returns even on error. - */ - rcu_read_unlock(); - mmgrab(other_mm); - vma_refcount_put(vma); - mmdrop(other_mm); - rcu_read_lock(); - return NULL; - } - - /* - * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. - * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq - * modification invalidates all existing locks. - * - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are - * racing with vma_end_write_all(), we only start reading from the VMA - * after it has been unlocked. - * This pairs with RELEASE semantics in vma_end_write_all(). - */ - if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { - vma_refcount_put(vma); - return NULL; - } - - return vma; -} - /* * Use only while holding mmap read lock which guarantees that locking will not * fail (nobody can concurrently write-lock the vma). vma_start_read() should diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index b006cec8e6fe..10826f347a9f 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -127,6 +127,91 @@ void vma_mark_detached(struct vm_area_struct *vma) } } +/* + * Try to read-lock a vma. The function is allowed to occasionally yield false + * locked result to avoid performance overhead, in which case we fall back to + * using mmap_lock. The function should never yield false unlocked result. + * False locked result is possible if mm_lock_seq overflows or if vma gets + * reused and attached to a different mm before we lock it. + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got + * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. + */ +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) +{ + int oldcnt; + + /* + * Check before locking. A race might cause false locked result. + * We can use READ_ONCE() for the mm_lock_seq here, and don't need + * ACQUIRE semantics, because this is just a lockless check whose result + * we don't rely on for anything - the mm_lock_seq read against which we + * need ordering is below. + */ + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) + return NULL; + + /* + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. + * Acquire fence is required here to avoid reordering against later + * vm_lock_seq check and checks inside lock_vma_under_rcu(). + */ + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) { + /* return EAGAIN if vma got detached from under us */ + return oldcnt ? NULL : ERR_PTR(-EAGAIN); + } + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + + /* + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq + * modification invalidates all existing locks. + * + * We must use ACQUIRE semantics for the mm_lock_seq so that if we are + * racing with vma_end_write_all(), we only start reading from the VMA + * after it has been unlocked. + * This pairs with RELEASE semantics in vma_end_write_all(). + */ + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { + vma_refcount_put(vma); + return NULL; + } + + return vma; +} + /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the