From: Suren Baghdasaryan <surenb@google.com>
Date: Sun, 4 Dec 2022 23:47:03 +0000 (-0800)
Subject: fixup: separate vma->lock from vm_area_struct
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=6638055b8f70dff1bb64ef8030020ae7fb96cf9f;p=users%2Fjedix%2Flinux-maple.git

fixup: separate vma->lock from vm_area_struct

vma->lock being part of the vm_area_struct causes performance regression
during page faults because during contention it's count and owner fields
are constantly updated and having other parts of vm_area_struct used
during page fault handling next to them causes constant cache line
bouncing. Fix that by moving the lock outside of the vm_area_struct.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 17b6aa12c47a..747edd74e7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -614,11 +614,9 @@ struct vm_operations_struct {
 };
 
 #ifdef CONFIG_PER_VMA_LOCK
-static inline void vma_init_lock(struct vm_area_struct *vma)
-{
-	init_rwsem(&vma->lock);
-	vma->vm_lock_seq = -1;
-}
+
+void vma_lock_init(struct vm_area_struct *vma);
+void vma_lock_free(struct vm_area_struct *vma);
 
 static inline void vma_write_lock(struct vm_area_struct *vma)
 {
@@ -634,25 +632,33 @@ static inline void vma_write_lock(struct vm_area_struct *vma)
 	if (vma->vm_lock_seq == mm_lock_seq)
 		return;
 
-	down_write(&vma->lock);
+	down_write(vma->lock);
 	vma->vm_lock_seq = mm_lock_seq;
-	up_write(&vma->lock);
+	up_write(vma->lock);
 }
 
+/*
+ * Try to read-lock a vma. The function is allowed to occasionally yield false
+ * locked result to avoid performance overhead, in which case we fall back to
+ * using mmap_lock. The function should never yield false unlocked result.
+ */
 static inline bool vma_read_trylock(struct vm_area_struct *vma)
 {
-	if (unlikely(down_read_trylock(&vma->lock) == 0))
+	/* Check before locking. A race might cause false locked result. */
+	if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
+		return false;
+
+	if (unlikely(down_read_trylock(vma->lock) == 0))
 		return false;
 
 	/*
-	 * Overflow might produce false locked result but it's not critical
-	 * because we just fall back to using mmap_lock in such case.
-	 * False unlocked result is critical but is impossible because we
-	 * modify and check vma->vm_lock_seq under vma->lock protection and
-	 * mm->mm_lock_seq modification invalidates all existing locks.
+	 * Overflow might produce false locked result.
+	 * False unlocked result is impossible because we modify and check
+	 * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq
+	 * modification invalidates all existing locks.
 	 */
-	if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) {
-		up_read(&vma->lock);
+	if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
+		up_read(vma->lock);
 		return false;
 	}
 	return true;
@@ -660,13 +666,13 @@ static inline bool vma_read_trylock(struct vm_area_struct *vma)
 
 static inline void vma_read_unlock(struct vm_area_struct *vma)
 {
-	up_read(&vma->lock);
+	up_read(vma->lock);
 }
 
 static inline void vma_assert_locked(struct vm_area_struct *vma)
 {
-	lockdep_assert_held(&vma->lock);
-	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->lock), vma);
+	lockdep_assert_held(vma->lock);
+	VM_BUG_ON_VMA(!rwsem_is_locked(vma->lock), vma);
 }
 
 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
@@ -681,7 +687,7 @@ static inline void vma_assert_write_locked(struct vm_area_struct *vma)
 
 static inline void vma_assert_no_reader(struct vm_area_struct *vma)
 {
-	VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock) &&
+	VM_BUG_ON_VMA(rwsem_is_locked(vma->lock) &&
 		      vma->vm_lock_seq != READ_ONCE(vma->vm_mm->mm_lock_seq),
 		      vma);
 }
@@ -691,7 +697,8 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
 
 #else /* CONFIG_PER_VMA_LOCK */
 
-static inline void vma_init_lock(struct vm_area_struct *vma) {}
+static inline void vma_lock_init(struct vm_area_struct *vma) {}
+static inline void vma_lock_free(struct vm_area_struct *vma) {}
 static inline void vma_write_lock(struct vm_area_struct *vma) {}
 static inline bool vma_read_trylock(struct vm_area_struct *vma)
 		{ return false; }
@@ -710,7 +717,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 	vma->vm_mm = mm;
 	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
-	vma_init_lock(vma);
+	vma_lock_init(vma);
 }
 
 /* Use when VMA is not part of the VMA tree and needs no locking */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e497f8613c26..1e58233f552a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -485,7 +485,7 @@ struct vm_area_struct {
 	 * So please be careful with adding new fields before lock, which can
 	 * push the 2 fields into one cacheline.
 	 */
-	struct rw_semaphore lock;
+	struct rw_semaphore *lock;
 #endif
 	/*
 	 * For areas with an address space and backing store,
diff --git a/kernel/fork.c b/kernel/fork.c
index c942825b802e..8440a156cd00 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -474,7 +474,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 		 */
 		*new = data_race(*orig);
 		INIT_LIST_HEAD(&new->anon_vma_chain);
-		vma_init_lock(new);
+		vma_lock_init(new);
 		dup_anon_vma_name(orig, new);
 	}
 	return new;
@@ -485,6 +485,7 @@ static inline void __vm_area_free(struct vm_area_struct *vma)
 {
 	/* The vma should either have no lock holders or be write-locked. */
 	vma_assert_no_reader(vma);
+	vma_lock_free(vma);
 	kmem_cache_free(vm_area_cachep, vma);
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index b8c2bdf10151..b90f72db83d2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5248,6 +5248,19 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
 #ifdef CONFIG_PER_VMA_LOCK
+
+void vma_lock_init(struct vm_area_struct *vma)
+{
+	vma->lock = kmalloc(sizeof(struct rw_semaphore), GFP_KERNEL);
+	init_rwsem(vma->lock);
+	vma->vm_lock_seq = -1;
+}
+
+void vma_lock_free(struct vm_area_struct *vma)
+{
+	kfree(vma->lock);
+}
+
 /*
  * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
  * stable and not isolated. If the VMA is not found or is being modified the