x86/ldt: Make modify_ldt synchronous

author Andy Lutomirski <luto@kernel.org>

Thu, 30 Jul 2015 21:31:32 +0000 (14:31 -0700)

committer Ingo Molnar <mingo@kernel.org>

Fri, 31 Jul 2015 08:23:23 +0000 (10:23 +0200)
author Andy Lutomirski <luto@kernel.org>
Thu, 30 Jul 2015 21:31:32 +0000 (14:31 -0700)
committer Ingo Molnar <mingo@kernel.org>
Fri, 31 Jul 2015 08:23:23 +0000 (10:23 +0200)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h

index a0bf89fd26470102f6f38031808437f01afe8e1f..4e10d73cf01844edc201ed23f51d28a72a537521 100644 (file)
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
         set_ldt(NULL, 0);
  }
  
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
-       set_ldt(pc->ldt, pc->size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-       preempt_disable();
-       load_LDT_nolock(pc);
-       preempt_enable();
-}
-
  static inline unsigned long get_desc_base(const struct desc_struct *desc)
  {
         return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h

index 09b9620a73b4841893fc2bd01bfd614d43e7e214..364d27481a52a34f5031933ecc577af0ebf3c129 100644 (file)
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,8 +9,7 @@
   * we put the segment information here.
   */
  typedef struct {
-       void *ldt;
-       int size;
+       struct ldt_struct *ldt;
  
  #ifdef CONFIG_X86_64
         /* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index 804a3a6030ca046500e7a092cfa0a854be77796f..984abfe47edc85db7b1a1809d5e0b2d22a7c0449 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm)
  static inline void load_mm_cr4(struct mm_struct *mm) {}
  #endif
  
+/*
+ * ldt_structs can be allocated, used, and freed, but they are never
+ * modified while live.
+ */
+struct ldt_struct {
+       /*
+        * Xen requires page-aligned LDTs with special permissions.  This is
+        * needed to prevent us from installing evil descriptors such as
+        * call gates.  On native, we could merge the ldt_struct and LDT
+        * allocations, but it's not worth trying to optimize.
+        */
+       struct desc_struct *entries;
+       int size;
+};
+
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
+       struct ldt_struct *ldt;
+
+       /* lockless_dereference synchronizes with smp_store_release */
+       ldt = lockless_dereference(mm->context.ldt);
+
+       /*
+        * Any change to mm->context.ldt is followed by an IPI to all
+        * CPUs with the mm active.  The LDT will not be freed until
+        * after the IPI is handled by all such CPUs.  This means that,
+        * if the ldt_struct changes before we return, the values we see
+        * will be safe, and the new values will be loaded before we run
+        * any user code.
+        *
+        * NB: don't try to convert this to use RCU without extreme care.
+        * We would still need IRQs off, because we don't want to change
+        * the local LDT after an IPI loaded a newer value than the one
+        * that we can see.
+        */
+
+       if (unlikely(ldt))
+               set_ldt(ldt->entries, ldt->size);
+       else
+               clear_LDT();
+
+       DEBUG_LOCKS_WARN_ON(preemptible());
+}
+
  /*
   * Used for LDT copy/destruction.
   */
@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                  * was called and then modify_ldt changed
                  * prev->context.ldt but suppressed an IPI to this CPU.
                  * In this case, prev->context.ldt != NULL, because we
-                * never free an LDT while the mm still exists.  That
-                * means that next->context.ldt != prev->context.ldt,
-                * because mms never share an LDT.
+                * never set context.ldt to NULL while the mm still
+                * exists.  That means that next->context.ldt !=
+                * prev->context.ldt, because mms never share an LDT.
                  */
                 if (unlikely(prev->context.ldt != next->context.ldt))
-                       load_LDT_nolock(&next->context);
+                       load_mm_ldt(next);
         }
  #ifdef CONFIG_SMP
           else {
@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                         load_cr3(next->pgd);
                         trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                         load_mm_cr4(next);
-                       load_LDT_nolock(&next->context);
+                       load_mm_ldt(next);
                 }
         }
  #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 922c5e0cea4c961b1aa6e7a266ce588de7f7a300..cb9e5df42dd2464f7249eea78cc28753eb858ec0 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1410,7 +1410,7 @@ void cpu_init(void)
         load_sp0(t, &current->thread);
         set_tss_desc(cpu, t);
         load_TR_desc();
-       load_LDT(&init_mm.context);
+       load_mm_ldt(&init_mm);
  
         clear_all_debug_regs();
         dbg_restore_debug_regs();
@@ -1459,7 +1459,7 @@ void cpu_init(void)
         load_sp0(t, thread);
         set_tss_desc(cpu, t);
         load_TR_desc();
-       load_LDT(&init_mm.context);
+       load_mm_ldt(&init_mm);
  
         t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
  
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c

index 3658de47900f9a921a0f8373db962e061d42db3a..9469dfa556074db4dff41212a4873b31d5907772 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
         int idx = segment >> 3;
  
         if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+               struct ldt_struct *ldt;
+
                 if (idx > LDT_ENTRIES)
                         return 0;
  
-               if (idx > current->active_mm->context.size)
+               /* IRQs are off, so this synchronizes with smp_store_release */
+               ldt = lockless_dereference(current->active_mm->context.ldt);
+               if (!ldt || idx > ldt->size)
                         return 0;
  
-               desc = current->active_mm->context.ldt;
+               desc = &ldt->entries[idx];
         } else {
                 if (idx > GDT_ENTRIES)
                         return 0;
  
-               desc = raw_cpu_ptr(gdt_page.gdt);
+               desc = raw_cpu_ptr(gdt_page.gdt) + idx;
         }
  
-       return get_desc_base(desc + idx);
+       return get_desc_base(desc);
  }
  
  #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c

index c37886d759ccac2736c36b357cc0f399786f2fb3..2bcc0525f1c10e80b3db33df075b3189c4a68239 100644 (file)
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,6 +12,7 @@
  #include <linux/string.h>
  #include <linux/mm.h>
  #include <linux/smp.h>
+#include <linux/slab.h>
  #include <linux/vmalloc.h>
  #include <linux/uaccess.h>
  
@@ -20,82 +21,82 @@
  #include <asm/mmu_context.h>
  #include <asm/syscalls.h>
  
-#ifdef CONFIG_SMP
+/* context.lock is held for us, so we don't need any locking. */
  static void flush_ldt(void *current_mm)
  {
-       if (current->active_mm == current_mm)
-               load_LDT(&current->active_mm->context);
+       mm_context_t *pc;
+
+       if (current->active_mm != current_mm)
+               return;
+
+       pc = &current->active_mm->context;
+       set_ldt(pc->ldt->entries, pc->ldt->size);
  }
-#endif
  
-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+static struct ldt_struct *alloc_ldt_struct(int size)
  {
-       void *oldldt, *newldt;
-       int oldsize;
-
-       if (mincount <= pc->size)
-               return 0;
-       oldsize = pc->size;
-       mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
-                       (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
-       if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
-               newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
+       struct ldt_struct *new_ldt;
+       int alloc_size;
+
+       if (size > LDT_ENTRIES)
+               return NULL;
+
+       new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
+       if (!new_ldt)
+               return NULL;
+
+       BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
+       alloc_size = size * LDT_ENTRY_SIZE;
+
+       /*
+        * Xen is very picky: it requires a page-aligned LDT that has no
+        * trailing nonzero bytes in any page that contains LDT descriptors.
+        * Keep it simple: zero the whole allocation and never allocate less
+        * than PAGE_SIZE.
+        */
+       if (alloc_size > PAGE_SIZE)
+               new_ldt->entries = vzalloc(alloc_size);
         else
-               newldt = (void *)__get_free_page(GFP_KERNEL);
-
-       if (!newldt)
-               return -ENOMEM;
+               new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
  
-       if (oldsize)
-               memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
-       oldldt = pc->ldt;
-       memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
-              (mincount - oldsize) * LDT_ENTRY_SIZE);
+       if (!new_ldt->entries) {
+               kfree(new_ldt);
+               return NULL;
+       }
  
-       paravirt_alloc_ldt(newldt, mincount);
+       new_ldt->size = size;
+       return new_ldt;
+}
  
-#ifdef CONFIG_X86_64
-       /* CHECKME: Do we really need this ? */
-       wmb();
-#endif
-       pc->ldt = newldt;
-       wmb();
-       pc->size = mincount;
-       wmb();
-
-       if (reload) {
-#ifdef CONFIG_SMP
-               preempt_disable();
-               load_LDT(pc);
-               if (!cpumask_equal(mm_cpumask(current->mm),
-                                  cpumask_of(smp_processor_id())))
-                       smp_call_function(flush_ldt, current->mm, 1);
-               preempt_enable();
-#else
-               load_LDT(pc);
-#endif
-       }
-       if (oldsize) {
-               paravirt_free_ldt(oldldt, oldsize);
-               if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
-                       vfree(oldldt);
-               else
-                       put_page(virt_to_page(oldldt));
-       }
-       return 0;
+/* After calling this, the LDT is immutable. */
+static void finalize_ldt_struct(struct ldt_struct *ldt)
+{
+       paravirt_alloc_ldt(ldt->entries, ldt->size);
  }
  
-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+/* context.lock is held */
+static void install_ldt(struct mm_struct *current_mm,
+                       struct ldt_struct *ldt)
  {
-       int err = alloc_ldt(new, old->size, 0);
-       int i;
+       /* Synchronizes with lockless_dereference in load_mm_ldt. */
+       smp_store_release(&current_mm->context.ldt, ldt);
+
+       /* Activate the LDT for all CPUs using current_mm. */
+       on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+}
  
-       if (err < 0)
-               return err;
+static void free_ldt_struct(struct ldt_struct *ldt)
+{
+       if (likely(!ldt))
+               return;
  
-       for (i = 0; i < old->size; i++)
-               write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
-       return 0;
+       paravirt_free_ldt(ldt->entries, ldt->size);
+       if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+               vfree(ldt->entries);
+       else
+               kfree(ldt->entries);
+       kfree(ldt);
  }
  
  /*
@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
   */
  int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
  {
+       struct ldt_struct *new_ldt;
         struct mm_struct *old_mm;
         int retval = 0;
  
         mutex_init(&mm->context.lock);
-       mm->context.size = 0;
         old_mm = current->mm;
-       if (old_mm && old_mm->context.size > 0) {
-               mutex_lock(&old_mm->context.lock);
-               retval = copy_ldt(&mm->context, &old_mm->context);
-               mutex_unlock(&old_mm->context.lock);
+       if (!old_mm) {
+               mm->context.ldt = NULL;
+               return 0;
         }
+
+       mutex_lock(&old_mm->context.lock);
+       if (!old_mm->context.ldt) {
+               mm->context.ldt = NULL;
+               goto out_unlock;
+       }
+
+       new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
+       if (!new_ldt) {
+               retval = -ENOMEM;
+               goto out_unlock;
+       }
+
+       memcpy(new_ldt->entries, old_mm->context.ldt->entries,
+              new_ldt->size * LDT_ENTRY_SIZE);
+       finalize_ldt_struct(new_ldt);
+
+       mm->context.ldt = new_ldt;
+
+out_unlock:
+       mutex_unlock(&old_mm->context.lock);
         return retval;
  }
  
@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
   */
  void destroy_context(struct mm_struct *mm)
  {
-       if (mm->context.size) {
-#ifdef CONFIG_X86_32
-               /* CHECKME: Can this ever happen ? */
-               if (mm == current->active_mm)
-                       clear_LDT();
-#endif
-               paravirt_free_ldt(mm->context.ldt, mm->context.size);
-               if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
-                       vfree(mm->context.ldt);
-               else
-                       put_page(virt_to_page(mm->context.ldt));
-               mm->context.size = 0;
-       }
+       free_ldt_struct(mm->context.ldt);
+       mm->context.ldt = NULL;
  }
  
  static int read_ldt(void __user *ptr, unsigned long bytecount)
  {
-       int err;
+       int retval;
         unsigned long size;
         struct mm_struct *mm = current->mm;
  
-       if (!mm->context.size)
-               return 0;
+       mutex_lock(&mm->context.lock);
+
+       if (!mm->context.ldt) {
+               retval = 0;
+               goto out_unlock;
+       }
+
         if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
                 bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
  
-       mutex_lock(&mm->context.lock);
-       size = mm->context.size * LDT_ENTRY_SIZE;
+       size = mm->context.ldt->size * LDT_ENTRY_SIZE;
         if (size > bytecount)
                 size = bytecount;
  
-       err = 0;
-       if (copy_to_user(ptr, mm->context.ldt, size))
-               err = -EFAULT;
-       mutex_unlock(&mm->context.lock);
-       if (err < 0)
-               goto error_return;
+       if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
+               retval = -EFAULT;
+               goto out_unlock;
+       }
+
         if (size != bytecount) {
-               /* zero-fill the rest */
-               if (clear_user(ptr + size, bytecount - size) != 0) {
-                       err = -EFAULT;
-                       goto error_return;
+               /* Zero-fill the rest and pretend we read bytecount bytes. */
+               if (clear_user(ptr + size, bytecount - size)) {
+                       retval = -EFAULT;
+                       goto out_unlock;
                 }
         }
-       return bytecount;
-error_return:
-       return err;
+       retval = bytecount;
+
+out_unlock:
+       mutex_unlock(&mm->context.lock);
+       return retval;
  }
  
  static int read_default_ldt(void __user *ptr, unsigned long bytecount)
@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
         struct desc_struct ldt;
         int error;
         struct user_desc ldt_info;
+       int oldsize, newsize;
+       struct ldt_struct *new_ldt, *old_ldt;
  
         error = -EINVAL;
         if (bytecount != sizeof(ldt_info))
@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
                         goto out;
         }
  
-       mutex_lock(&mm->context.lock);
-       if (ldt_info.entry_number >= mm->context.size) {
-               error = alloc_ldt(&current->mm->context,
-                                 ldt_info.entry_number + 1, 1);
-               if (error < 0)
-                       goto out_unlock;
-       }
-
-       /* Allow LDTs to be cleared by the user. */
-       if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
-               if (oldmode || LDT_empty(&ldt_info)) {
-                       memset(&ldt, 0, sizeof(ldt));
-                       goto install;
+       if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
+           LDT_empty(&ldt_info)) {
+               /* The user wants to clear the entry. */
+               memset(&ldt, 0, sizeof(ldt));
+       } else {
+               if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+                       error = -EINVAL;
+                       goto out;
                 }
+
+               fill_ldt(&ldt, &ldt_info);
+               if (oldmode)
+                       ldt.avl = 0;
         }
  
-       if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
-               error = -EINVAL;
+       mutex_lock(&mm->context.lock);
+
+       old_ldt = mm->context.ldt;
+       oldsize = old_ldt ? old_ldt->size : 0;
+       newsize = max((int)(ldt_info.entry_number + 1), oldsize);
+
+       error = -ENOMEM;
+       new_ldt = alloc_ldt_struct(newsize);
+       if (!new_ldt)
                 goto out_unlock;
-       }
  
-       fill_ldt(&ldt, &ldt_info);
-       if (oldmode)
-               ldt.avl = 0;
+       if (old_ldt)
+               memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
+       new_ldt->entries[ldt_info.entry_number] = ldt;
+       finalize_ldt_struct(new_ldt);
  
-       /* Install the new entry ...  */
-install:
-       write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
+       install_ldt(mm, new_ldt);
+       free_ldt_struct(old_ldt);
         error = 0;
  
  out_unlock:
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c

index 71d7849a07f7ca93b126bf081464012c7a91c9a6..f6b916387590158706b8ef4ec68bfef64702ff0f 100644 (file)
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
  void release_thread(struct task_struct *dead_task)
  {
         if (dead_task->mm) {
-               if (dead_task->mm->context.size) {
+               if (dead_task->mm->context.ldt) {
                         pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
                                 dead_task->comm,
                                 dead_task->mm->context.ldt,
-                               dead_task->mm->context.size);
+                               dead_task->mm->context.ldt->size);
                         BUG();
                 }
         }
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c

index 9b4d51d0c0d013274f7ba46c2e58319f0d1d9145..6273324186ac5ca7adba69be5ded69f23d8882f7 100644 (file)
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -5,6 +5,7 @@
  #include <linux/mm.h>
  #include <linux/ptrace.h>
  #include <asm/desc.h>
+#include <asm/mmu_context.h>
  
  unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
  {
@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
                 seg &= ~7UL;
  
                 mutex_lock(&child->mm->context.lock);
-               if (unlikely((seg >> 3) >= child->mm->context.size))
+               if (unlikely(!child->mm->context.ldt ||
+                            (seg >> 3) >= child->mm->context.ldt->size))
                         addr = -1L; /* bogus selector, access would fault */
                 else {
-                       desc = child->mm->context.ldt + seg;
+                       desc = &child->mm->context.ldt->entries[seg];
                         base = get_desc_base(desc);
  
                         /* 16-bit code segment? */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c

index 0d7dd1f5ac36fa6814c18522dd28561566c570eb..9ab52791fed59e3ab2e531611037c4ed9dc7bde3 100644 (file)
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -22,6 +22,7 @@
  #include <asm/fpu/internal.h>
  #include <asm/debugreg.h>
  #include <asm/cpu.h>
+#include <asm/mmu_context.h>
  
  #ifdef CONFIG_X86_32
  __visible unsigned long saved_context_ebx;
@@ -153,7 +154,7 @@ static void fix_processor_context(void)
         syscall_init();                         /* This sets MSR_*STAR and related */
  #endif
         load_TR_desc();                         /* This does ltr */
-       load_LDT(&current->active_mm->context); /* This does lldt */
+       load_mm_ldt(current->active_mm);        /* This does lldt */
  
         fpu__resume_cpu();
  }
author	Andy Lutomirski <luto@kernel.org>
	Thu, 30 Jul 2015 21:31:32 +0000 (14:31 -0700)
committer	Ingo Molnar <mingo@kernel.org>
	Fri, 31 Jul 2015 08:23:23 +0000 (10:23 +0200)
arch/x86/include/asm/desc.h		patch \| blob \| history
arch/x86/include/asm/mmu.h		patch \| blob \| history
arch/x86/include/asm/mmu_context.h		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event.c		patch \| blob \| history
arch/x86/kernel/ldt.c		patch \| blob \| history
arch/x86/kernel/process_64.c		patch \| blob \| history
arch/x86/kernel/step.c		patch \| blob \| history
arch/x86/power/cpu.c		patch \| blob \| history