]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
patch.diff
authorLiam R. Howlett <Liam.Howlett@oracle.com>
Fri, 16 Jun 2023 13:44:54 +0000 (09:44 -0400)
committerLiam R. Howlett <Liam.Howlett@oracle.com>
Fri, 16 Jun 2023 15:38:01 +0000 (11:38 -0400)
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
arch/x86/Kconfig
arch/x86/mm/fault.c
include/linux/mm.h
mm/Kconfig
mm/memory.c

index 53bab123a8ee40a2484ae63b29cac4e8f545ee32..cb1031018afa5fb4d90dc61069fd2e586c26ef14 100644 (file)
@@ -276,6 +276,7 @@ config X86
        select HAVE_GENERIC_VDSO
        select HOTPLUG_SMT                      if SMP
        select IRQ_FORCED_THREADING
+       select LOCK_MM_AND_FIND_VMA
        select NEED_PER_CPU_EMBED_FIRST_CHUNK
        select NEED_PER_CPU_PAGE_FIRST_CHUNK
        select NEED_SG_DMA_LENGTH
index e4399983c50c05aa1e2906ce9df15874304f9b31..a6199100444e9b31d0abbdfe4df72a4ae7bcd0b9 100644 (file)
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
        __bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
 }
 
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
-       __bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
 static inline bool bad_area_access_from_pkeys(unsigned long error_code,
                struct vm_area_struct *vma)
 {
@@ -1336,15 +1330,15 @@ void do_user_addr_fault(struct pt_regs *regs,
 
 #ifdef CONFIG_PER_VMA_LOCK
        if (!(flags & FAULT_FLAG_USER))
-               goto lock_mmap;
+               goto retry;
 
        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
-               goto lock_mmap;
+               goto retry;
 
        if (unlikely(access_error(error_code, vma))) {
                vma_end_read(vma);
-               goto lock_mmap;
+               goto retry;
        }
        fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
        vma_end_read(vma);
@@ -1363,54 +1357,12 @@ void do_user_addr_fault(struct pt_regs *regs,
                                                 ARCH_DEFAULT_PKEY);
                return;
        }
-lock_mmap:
 #endif /* CONFIG_PER_VMA_LOCK */
 
-       /*
-        * Kernel-mode access to the user address space should only occur
-        * on well-defined single instructions listed in the exception
-        * tables.  But, an erroneous kernel fault occurring outside one of
-        * those areas which also holds mmap_lock might deadlock attempting
-        * to validate the fault against the address space.
-        *
-        * Only do the expensive exception table search when we might be at
-        * risk of a deadlock.  This happens if we
-        * 1. Failed to acquire mmap_lock, and
-        * 2. The access did not originate in userspace.
-        */
-       if (unlikely(!mmap_read_trylock(mm))) {
-               if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
-                       /*
-                        * Fault from code in kernel from
-                        * which we do not expect faults.
-                        */
-                       bad_area_nosemaphore(regs, error_code, address);
-                       return;
-               }
 retry:
-               mmap_read_lock(mm);
-       } else {
-               /*
-                * The above down_read_trylock() might have succeeded in
-                * which case we'll have missed the might_sleep() from
-                * down_read():
-                */
-               might_sleep();
-       }
-
-       vma = find_vma(mm, address);
+       vma = lock_mm_and_find_vma(mm, address, regs);
        if (unlikely(!vma)) {
-               bad_area(regs, error_code, address);
-               return;
-       }
-       if (likely(vma->vm_start <= address))
-               goto good_area;
-       if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
-               bad_area(regs, error_code, address);
-               return;
-       }
-       if (unlikely(expand_stack(vma, address))) {
-               bad_area(regs, error_code, address);
+               bad_area_nosemaphore(regs, error_code, address);
                return;
        }
 
@@ -1418,7 +1370,6 @@ retry:
         * Ok, we have a good vm_area for this memory access, so
         * we can handle it..
         */
-good_area:
        if (unlikely(access_error(error_code, vma))) {
                bad_area_access_error(regs, error_code, address, vma);
                return;
index 27ce77080c79c7a026e641e491246fcf6f7e26c0..570cf906fbcc1e00de40f1e1106f357cc4a9bb0f 100644 (file)
@@ -2325,6 +2325,8 @@ void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows);
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+               unsigned long address, struct pt_regs *regs);
 #else
 static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
                                         unsigned long address, unsigned int flags,
index 7672a22647b4a2434c22bda7b92b3897efc84783..e3454087fd31ae8307a4272b55a031c40356d3ed 100644 (file)
@@ -1206,6 +1206,10 @@ config PER_VMA_LOCK
          This feature allows locking each virtual memory area separately when
          handling page faults instead of taking mmap_lock.
 
+config LOCK_MM_AND_FIND_VMA
+       bool
+       depends on !STACK_GROWSUP
+
 source "mm/damon/Kconfig"
 
 endmenu
index f69fbc2511984e224ab31f38a6315404b5d902b1..c291511c3155d0d37812937054d9d306e3c87bef 100644 (file)
@@ -5262,6 +5262,105 @@ out:
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+       /* Even if this succeeds, make it clear we *might* have slept */
+       if (likely(mmap_read_trylock(mm))) {
+               might_sleep();
+               return true;
+       }
+
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+
+       mmap_read_lock(mm);
+       return true;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+#if 0
+       /*
+        * We don't have this operation yet.
+        *
+        * It should be easy enough to do: it's basically a
+        *    atomic_long_try_cmpxchg_acquire()
+        * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+        * it also needs the proper lockdep magic etc.
+        */
+       if (mmap_upgrade_trylock(mm))
+               return true;
+#endif
+
+       mmap_read_unlock(mm);
+       if (regs && !user_mode(regs)) {
+               unsigned long ip = instruction_pointer(regs);
+               if (!search_exception_tables(ip))
+                       return false;
+       }
+       mmap_write_lock(mm);
+       return true;
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+                       unsigned long addr, struct pt_regs *regs)
+{
+       struct vm_area_struct *vma;
+
+       if (!get_mmap_lock_carefully(mm, regs))
+               return NULL;
+
+       vma = find_vma(mm, addr);
+       if (likely(vma && (vma->vm_start <= addr)))
+               return vma;
+
+       /*
+        * Well, dang. We might still be successful, but only
+        * if we can extend a vma to do so.
+        */
+       if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+               mmap_read_unlock(mm);
+               return NULL;
+       }
+
+       if (!upgrade_mmap_lock_carefully(mm, regs))
+               return NULL;
+
+       vma = find_extend_vma(mm, addr);
+       if (!vma) {
+               mmap_write_unlock(mm);
+               return NULL;
+       }
+       mmap_write_downgrade(mm);
+       return vma;
+}
+#endif
+
 #ifdef CONFIG_PER_VMA_LOCK
 /*
  * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be