]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
kernel/fork: throttle call_rcu() calls in vm_area_free
authorSuren Baghdasaryan <surenb@google.com>
Sun, 7 Aug 2022 08:25:53 +0000 (01:25 -0700)
committerLiam R. Howlett <Liam.Howlett@oracle.com>
Wed, 4 Jan 2023 20:59:26 +0000 (15:59 -0500)
call_rcu() can take a long time when callback offloading is enabled.
Its use in the vm_area_free can cause regressions in the exit path when
multiple VMAs are being freed. To minimize that impact, place VMAs into
a list and free them in groups using one call_rcu() call per group.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
include/linux/mm.h
include/linux/mm_types.h
kernel/fork.c
mm/init-mm.c
mm/mmap.c

index 3d11db9f2c3293cf0aa07d7e7e269693d981272e..31abff298fe0bec521c98d04202b9ee83ee51181 100644 (file)
@@ -249,6 +249,7 @@ void setup_initial_init_mm(void *start_code, void *end_code,
 struct vm_area_struct *vm_area_alloc(struct mm_struct *);
 struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
 void vm_area_free(struct vm_area_struct *);
+void drain_free_vmas(struct mm_struct *mm);
 
 #ifndef CONFIG_MMU
 extern struct rb_root nommu_region_tree;
index 0d1e7c7b0acfa7f4d5547159b0f3d150d39e2d63..1c95cd889cd1fd525188c8eae9643d440e5f0479 100644 (file)
@@ -533,6 +533,9 @@ struct vm_area_struct {
 #endif
 #ifdef CONFIG_NUMA
        struct mempolicy *vm_policy;    /* NUMA policy for the VMA */
+#endif
+#ifdef CONFIG_PER_VMA_LOCK
+       struct list_head vm_free_list;
 #endif
        struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 } __randomize_layout;
@@ -615,6 +618,11 @@ struct mm_struct {
                                          */
 #ifdef CONFIG_PER_VMA_LOCK
                int mm_lock_seq;
+               struct {
+                       struct list_head head;
+                       spinlock_t lock;
+                       int size;
+               } vma_free_list;
 #endif
 
 
index b3a7ffc2c0a4a9b4fb8ab2197ca4e51faf9aa3c0..c942825b802ea069a58fd423e6c9709e8f03b6d1 100644 (file)
@@ -481,26 +481,75 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
 }
 
 #ifdef CONFIG_PER_VMA_LOCK
-static void __vm_area_free(struct rcu_head *head)
+static inline void __vm_area_free(struct vm_area_struct *vma)
 {
-       struct vm_area_struct *vma = container_of(head, struct vm_area_struct,
-                                                 vm_rcu);
        /* The vma should either have no lock holders or be write-locked. */
        vma_assert_no_reader(vma);
        kmem_cache_free(vm_area_cachep, vma);
 }
-#endif
+
+static void vma_free_rcu_callback(struct rcu_head *head)
+{
+       struct vm_area_struct *first_vma;
+       struct vm_area_struct *vma, *vma2;
+
+       first_vma = container_of(head, struct vm_area_struct, vm_rcu);
+       list_for_each_entry_safe(vma, vma2, &first_vma->vm_free_list, vm_free_list)
+               __vm_area_free(vma);
+       __vm_area_free(first_vma);
+}
+
+void drain_free_vmas(struct mm_struct *mm)
+{
+       struct vm_area_struct *first_vma;
+       LIST_HEAD(to_destroy);
+
+       spin_lock(&mm->vma_free_list.lock);
+       list_splice_init(&mm->vma_free_list.head, &to_destroy);
+       mm->vma_free_list.size = 0;
+       spin_unlock(&mm->vma_free_list.lock);
+
+       if (list_empty(&to_destroy))
+               return;
+
+       first_vma = list_first_entry(&to_destroy, struct vm_area_struct, vm_free_list);
+       /* Remove the head which is allocated on the stack */
+       list_del(&to_destroy);
+
+       call_rcu(&first_vma->vm_rcu, vma_free_rcu_callback);
+}
+
+#define VM_AREA_FREE_LIST_MAX  32
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       bool drain;
+
+       free_anon_vma_name(vma);
+
+       spin_lock(&mm->vma_free_list.lock);
+       list_add(&vma->vm_free_list, &mm->vma_free_list.head);
+       mm->vma_free_list.size++;
+       drain = mm->vma_free_list.size > VM_AREA_FREE_LIST_MAX;
+       spin_unlock(&mm->vma_free_list.lock);
+
+       if (drain)
+               drain_free_vmas(mm);
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
+void drain_free_vmas(struct mm_struct *mm) {}
 
 void vm_area_free(struct vm_area_struct *vma)
 {
        free_anon_vma_name(vma);
-#ifdef CONFIG_PER_VMA_LOCK
-       call_rcu(&vma->vm_rcu, __vm_area_free);
-#else
        kmem_cache_free(vm_area_cachep, vma);
-#endif
 }
 
+#endif /* CONFIG_PER_VMA_LOCK */
+
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
        if (IS_ENABLED(CONFIG_VMAP_STACK)) {
@@ -1135,6 +1184,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        INIT_LIST_HEAD(&mm->mmlist);
 #ifdef CONFIG_PER_VMA_LOCK
        WRITE_ONCE(mm->mm_lock_seq, 0);
+       INIT_LIST_HEAD(&mm->vma_free_list.head);
+       spin_lock_init(&mm->vma_free_list.lock);
+       mm->vma_free_list.size = 0;
 #endif
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
index 33269314e06017509dd39255ea910118bdc7e0a2..b53d23c2d7a34b8c9340b3afb1ffb724fddf73c9 100644 (file)
@@ -39,6 +39,9 @@ struct mm_struct init_mm = {
        .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
 #ifdef CONFIG_PER_VMA_LOCK
        .mm_lock_seq    = 0,
+       .vma_free_list.head = LIST_HEAD_INIT(init_mm.vma_free_list.head),
+       .vma_free_list.lock =  __SPIN_LOCK_UNLOCKED(init_mm.vma_free_list.lock),
+       .vma_free_list.size = 0,
 #endif
        .user_ns        = &init_user_ns,
        .cpu_bitmap     = CPU_BITS_NONE,
index bd73441548d77a9f320e9b94d585050b809c3542..60412fe3114d36c7e7e8102aefb19522cd00d9af 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3174,6 +3174,7 @@ void exit_mmap(struct mm_struct *mm)
        trace_exit_mmap(mm);
        __mt_destroy(&mm->mm_mt);
        mmap_write_unlock(mm);
+       drain_free_vmas(mm);
        vm_unacct_memory(nr_accounted);
 }