From: Suren Baghdasaryan Date: Sun, 4 Dec 2022 23:47:03 +0000 (-0800) Subject: fixup: separate vma->lock from vm_area_struct X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=6638055b8f70dff1bb64ef8030020ae7fb96cf9f;p=users%2Fjedix%2Flinux-maple.git fixup: separate vma->lock from vm_area_struct vma->lock being part of the vm_area_struct causes performance regression during page faults because during contention it's count and owner fields are constantly updated and having other parts of vm_area_struct used during page fault handling next to them causes constant cache line bouncing. Fix that by moving the lock outside of the vm_area_struct. Signed-off-by: Suren Baghdasaryan --- diff --git a/include/linux/mm.h b/include/linux/mm.h index 17b6aa12c47a..747edd74e7b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -614,11 +614,9 @@ struct vm_operations_struct { }; #ifdef CONFIG_PER_VMA_LOCK -static inline void vma_init_lock(struct vm_area_struct *vma) -{ - init_rwsem(&vma->lock); - vma->vm_lock_seq = -1; -} + +void vma_lock_init(struct vm_area_struct *vma); +void vma_lock_free(struct vm_area_struct *vma); static inline void vma_write_lock(struct vm_area_struct *vma) { @@ -634,25 +632,33 @@ static inline void vma_write_lock(struct vm_area_struct *vma) if (vma->vm_lock_seq == mm_lock_seq) return; - down_write(&vma->lock); + down_write(vma->lock); vma->vm_lock_seq = mm_lock_seq; - up_write(&vma->lock); + up_write(vma->lock); } +/* + * Try to read-lock a vma. The function is allowed to occasionally yield false + * locked result to avoid performance overhead, in which case we fall back to + * using mmap_lock. The function should never yield false unlocked result. + */ static inline bool vma_read_trylock(struct vm_area_struct *vma) { - if (unlikely(down_read_trylock(&vma->lock) == 0)) + /* Check before locking. A race might cause false locked result. */ + if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) + return false; + + if (unlikely(down_read_trylock(vma->lock) == 0)) return false; /* - * Overflow might produce false locked result but it's not critical - * because we just fall back to using mmap_lock in such case. - * False unlocked result is critical but is impossible because we - * modify and check vma->vm_lock_seq under vma->lock protection and - * mm->mm_lock_seq modification invalidates all existing locks. + * Overflow might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * modification invalidates all existing locks. */ - if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) { - up_read(&vma->lock); + if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { + up_read(vma->lock); return false; } return true; @@ -660,13 +666,13 @@ static inline bool vma_read_trylock(struct vm_area_struct *vma) static inline void vma_read_unlock(struct vm_area_struct *vma) { - up_read(&vma->lock); + up_read(vma->lock); } static inline void vma_assert_locked(struct vm_area_struct *vma) { - lockdep_assert_held(&vma->lock); - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->lock), vma); + lockdep_assert_held(vma->lock); + VM_BUG_ON_VMA(!rwsem_is_locked(vma->lock), vma); } static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -681,7 +687,7 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma) static inline void vma_assert_no_reader(struct vm_area_struct *vma) { - VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock) && + VM_BUG_ON_VMA(rwsem_is_locked(vma->lock) && vma->vm_lock_seq != READ_ONCE(vma->vm_mm->mm_lock_seq), vma); } @@ -691,7 +697,8 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, #else /* CONFIG_PER_VMA_LOCK */ -static inline void vma_init_lock(struct vm_area_struct *vma) {} +static inline void vma_lock_init(struct vm_area_struct *vma) {} +static inline void vma_lock_free(struct vm_area_struct *vma) {} static inline void vma_write_lock(struct vm_area_struct *vma) {} static inline bool vma_read_trylock(struct vm_area_struct *vma) { return false; } @@ -710,7 +717,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_init_lock(vma); + vma_lock_init(vma); } /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e497f8613c26..1e58233f552a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -485,7 +485,7 @@ struct vm_area_struct { * So please be careful with adding new fields before lock, which can * push the 2 fields into one cacheline. */ - struct rw_semaphore lock; + struct rw_semaphore *lock; #endif /* * For areas with an address space and backing store, diff --git a/kernel/fork.c b/kernel/fork.c index c942825b802e..8440a156cd00 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -474,7 +474,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) */ *new = data_race(*orig); INIT_LIST_HEAD(&new->anon_vma_chain); - vma_init_lock(new); + vma_lock_init(new); dup_anon_vma_name(orig, new); } return new; @@ -485,6 +485,7 @@ static inline void __vm_area_free(struct vm_area_struct *vma) { /* The vma should either have no lock holders or be write-locked. */ vma_assert_no_reader(vma); + vma_lock_free(vma); kmem_cache_free(vm_area_cachep, vma); } diff --git a/mm/memory.c b/mm/memory.c index b8c2bdf10151..b90f72db83d2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5248,6 +5248,19 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, EXPORT_SYMBOL_GPL(handle_mm_fault); #ifdef CONFIG_PER_VMA_LOCK + +void vma_lock_init(struct vm_area_struct *vma) +{ + vma->lock = kmalloc(sizeof(struct rw_semaphore), GFP_KERNEL); + init_rwsem(vma->lock); + vma->vm_lock_seq = -1; +} + +void vma_lock_free(struct vm_area_struct *vma) +{ + kfree(vma->lock); +} + /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the