]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Add support for pv hugepages and support for huge balloon pages.
authorDave McCracken <dave.mccracken@oracle.com>
Fri, 20 Jan 2012 15:34:37 +0000 (09:34 -0600)
committerGuru Anbalagane <guru.anbalagane@oracle.com>
Fri, 27 Jan 2012 01:21:23 +0000 (17:21 -0800)
Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
arch/x86/include/asm/hugetlb.h
arch/x86/mm/hugetlbpage.c
arch/x86/xen/mmu.c
drivers/xen/balloon.c
drivers/xen/xen-balloon.c
include/xen/interface/xen.h

index 439a9acc132d10f77b469fc24b250f3acc2f51d9..28b0de95375339e38c852bd94c00a336c35f41d7 100644 (file)
@@ -36,16 +36,24 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
        free_pgd_range(tlb, addr, end, floor, ceiling);
 }
 
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+       return *ptep;
+}
+
 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
                                   pte_t *ptep, pte_t pte)
 {
-       set_pte_at(mm, addr, ptep, pte);
+       set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte)));
 }
 
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
                                            unsigned long addr, pte_t *ptep)
 {
-       return ptep_get_and_clear(mm, addr, ptep);
+       pte_t pte = huge_ptep_get(ptep);
+
+       set_huge_pte_at(mm, addr, ptep, __pte(0));
+       return pte;
 }
 
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
@@ -66,28 +74,45 @@ static inline pte_t huge_pte_wrprotect(pte_t pte)
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
                                           unsigned long addr, pte_t *ptep)
 {
-       ptep_set_wrprotect(mm, addr, ptep);
+       pte_t pte = huge_ptep_get(ptep);
+
+       pte = pte_wrprotect(pte);
+       set_huge_pte_at(mm, addr, ptep, pte);
 }
 
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
                                             unsigned long addr, pte_t *ptep,
                                             pte_t pte, int dirty)
 {
-       return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
+       pte_t oldpte = huge_ptep_get(ptep);
+       int changed = !pte_same(oldpte, pte);
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-       return *ptep;
+       if (changed && dirty) {
+               set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+               flush_tlb_page(vma, addr);
+       }
+
+       return changed;
 }
 
+#ifdef CONFIG_XEN
+int xen_prepare_hugepage(struct page *page);
+void xen_release_hugepage(struct page *page);
+#endif
 static inline int arch_prepare_hugepage(struct page *page)
 {
+#ifdef CONFIG_XEN
+       return xen_prepare_hugepage(page);
+#else
        return 0;
+#endif
 }
 
 static inline void arch_release_hugepage(struct page *page)
 {
+#ifdef CONFIG_XEN
+       return xen_release_hugepage(page);
+#endif
 }
 
 #endif /* _ASM_X86_HUGETLB_H */
index f581a18c0d4d7d07aac5cdfa8fb443106e3d5e92..d8fdb1af100c52d175bed09115343e6aa92a15f6 100644 (file)
@@ -117,6 +117,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
        pgd_t *pgd = pgd_offset(mm, *addr);
        pud_t *pud = pud_offset(pgd, *addr);
 
+       if (xen_pv_domain())
+               return 0;
+
        BUG_ON(page_count(virt_to_page(ptep)) == 0);
        if (page_count(virt_to_page(ptep)) == 1)
                return 0;
@@ -141,7 +144,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                        pte = (pte_t *)pud;
                } else {
                        BUG_ON(sz != PMD_SIZE);
-                       if (pud_none(*pud))
+                       if (!xen_pv_domain() && pud_none(*pud))
                                huge_pmd_share(mm, addr, pud);
                        pte = (pte_t *) pmd_alloc(mm, pud, addr);
                }
index 1d9858e07288914179184d82b48dd25f292d6cff..c85780a43f5b75f9ceee8765634b93723cc83e27 100644 (file)
@@ -2364,3 +2364,48 @@ out:
        return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+int xen_prepare_hugepage(struct page *page)
+{
+       struct mmuext_op op;
+       unsigned long pfn, mfn, m;
+       int i;
+       int rc;
+
+       if (!xen_pv_domain())
+               return 0;
+
+       pfn = page_to_pfn(page);
+       mfn = pfn_to_mfn(pfn);
+       if (mfn & ((HPAGE_SIZE/PAGE_SIZE)-1)) {
+               printk("Guest pages are not properly aligned to use hugepages\n");
+               return 1;
+       }
+       for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) {
+               if (pfn_to_mfn(pfn) != m) {
+                       printk("Guest pages are not properly aligned to use hugepages\n");
+                       return 1;
+               }
+       }
+
+       op.cmd = MMUEXT_MARK_SUPER;
+       op.arg1.mfn = mfn;
+       rc = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+       if (rc) {
+               printk("Xen hypervisor is not configured to allow hugepages\n");
+               return 1;
+       }
+       return 0;
+}
+
+void xen_release_hugepage(struct page *page)
+{
+       struct mmuext_op op;
+
+       if (!xen_pv_domain())
+               return;
+
+       op.cmd = MMUEXT_UNMARK_SUPER;
+       op.arg1.mfn = pfn_to_mfn(page_to_pfn(page));
+       HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+}
index 1a23033a1ac5e8d040f878305dcc1d8ea6756b55..9ddd8323bc5aec41f4340a6c41961a6933dd94be 100644 (file)
@@ -86,6 +86,14 @@ static DEFINE_MUTEX(balloon_mutex);
 struct balloon_stats balloon_stats;
 EXPORT_SYMBOL_GPL(balloon_stats);
 
+/*
+ * Work in pages of this order.  Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+int balloon_order;
+static unsigned long balloon_npages;
+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
@@ -112,10 +120,41 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 static void scrub_page(struct page *page)
 {
 #ifdef CONFIG_XEN_SCRUB_PAGES
-       clear_highpage(page);
+       int i;
+
+       for (i = 0; i < balloon_npages; i++)
+               clear_highpage(page++);
 #endif
 }
 
+static void free_discontig_frame(void)
+{
+       int rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .domid        = DOMID_SELF,
+               .nr_extents   = balloon_npages,
+               .extent_order = 0
+       };
+
+       set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
+       rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(rc != balloon_npages);
+}
+
+static unsigned long shrink_frame(unsigned long nr_pages)
+{
+       unsigned long i, j;
+
+       for (i = 0, j = 0; i < nr_pages; i++, j++) {
+               if (frame_list[i] == 0)
+                       j++;
+               if (i != j)
+                       frame_list[i] = frame_list[j];
+       }
+       return i;
+}
+
 /* balloon_append: add the given page to the balloon. */
 static void __balloon_append(struct page *page)
 {
@@ -134,7 +173,7 @@ static void balloon_append(struct page *page)
        __balloon_append(page);
        if (PageHighMem(page))
                dec_totalhigh_pages();
-       totalram_pages--;
+       totalram_pages -= balloon_npages;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -157,7 +196,7 @@ static struct page *balloon_retrieve(bool prefer_highmem)
        } else
                balloon_stats.balloon_low--;
 
-       totalram_pages++;
+       totalram_pages += balloon_npages;
 
        return page;
 }
@@ -313,11 +352,10 @@ static enum bp_state reserve_additional_memory(long credit)
 static enum bp_state increase_reservation(unsigned long nr_pages)
 {
        int rc;
-       unsigned long  pfn, i;
+       unsigned long  pfn, mfn, i, j;
        struct page   *page;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -345,6 +383,8 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
+       reservation.extent_order = balloon_order;
+
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
        if (rc <= 0)
                return BP_EAGAIN;
@@ -354,19 +394,22 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
+               mfn = frame_list[i];
                BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
                       phys_to_machine_mapping_valid(pfn));
 
-               set_phys_to_machine(pfn, frame_list[i]);
-
-               /* Link back into the page tables if not highmem. */
-               if (xen_pv_domain() && !PageHighMem(page)) {
-                       int ret;
-                       ret = HYPERVISOR_update_va_mapping(
-                               (unsigned long)__va(pfn << PAGE_SHIFT),
-                               mfn_pte(frame_list[i], PAGE_KERNEL),
-                               0);
-                       BUG_ON(ret);
+               for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+                       set_phys_to_machine(pfn, mfn);
+
+                       /* Link back into the page tables if not highmem. */
+                       if (xen_pv_domain() && !PageHighMem(page)) {
+                               int ret;
+                               ret = HYPERVISOR_update_va_mapping(
+                                       (unsigned long)__va(pfn << PAGE_SHIFT),
+                                       mfn_pte(mfn, PAGE_KERNEL),
+                                       0);
+                               BUG_ON(ret);
+                       }
                }
 
                /* Relinquish the page back to the allocator. */
@@ -383,12 +426,12 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 {
        enum bp_state state = BP_DONE;
-       unsigned long  pfn, i;
+       unsigned long  pfn, lpfn, mfn, i, j;
+       int discontig, discontig_free;
        struct page   *page;
        int ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
-               .extent_order = 0,
                .domid        = DOMID_SELF
        };
 
@@ -405,7 +448,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
-               if ((page = alloc_page(gfp)) == NULL) {
+               if ((page = alloc_pages(gfp, balloon_order)) == NULL) {
                        nr_pages = i;
                        state = BP_EAGAIN;
                        break;
@@ -430,19 +473,36 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
        flush_tlb_all();
 
        /* No more mappings: invalidate P2M and add to balloon. */
+       discontig = 0;
        for (i = 0; i < nr_pages; i++) {
-               pfn = mfn_to_pfn(frame_list[i]);
-               __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+               mfn = frame_list[i];
+               lpfn = pfn = mfn_to_pfn(mfn);
                balloon_append(pfn_to_page(pfn));
+               discontig_free = 0;
+               for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
+                       if ((discontig_frame_list[j] = pfn_to_mfn(lpfn))
+                           != mfn)
+                               discontig_free = 1;
+
+                       set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
+               }
+               if (discontig_free) {
+                       free_discontig_frame();
+                       frame_list[i] = 0;
+                       discontig = 1;
+               }
        }
+       balloon_stats.current_pages -= nr_pages;
+
+       if (discontig)
+               nr_pages = shrink_frame(nr_pages);
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
+       reservation.extent_order = balloon_order;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
        BUG_ON(ret != nr_pages);
 
-       balloon_stats.current_pages -= nr_pages;
-
        return state;
 }
 
@@ -571,7 +631,7 @@ static void __init balloon_add_region(unsigned long start_pfn,
         */
        extra_pfn_end = min(max_pfn, start_pfn + pages);
 
-       for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+       for (pfn = start_pfn; pfn < extra_pfn_end; pfn += balloon_npages) {
                page = pfn_to_page(pfn);
                /* totalram_pages and totalhigh_pages do not
                   include the boot-time balloon extension, so
@@ -587,11 +647,14 @@ static int __init balloon_init(void)
        if (!xen_domain())
                return -ENODEV;
 
-       pr_info("xen/balloon: Initialising balloon driver.\n");
+       pr_info("xen_balloon: Initialising balloon driver with page order %d.\n",
+               balloon_order);
+
+       balloon_npages = 1 << balloon_order;
 
-       balloon_stats.current_pages = xen_pv_domain()
+       balloon_stats.current_pages = (xen_pv_domain()
                ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
-               : max_pfn;
+               : max_pfn) >> balloon_order;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
@@ -623,4 +686,12 @@ static int __init balloon_init(void)
 
 subsys_initcall(balloon_init);
 
+static int __init balloon_parse_huge(char *s)
+{
+       balloon_order = 9;
+       return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
 MODULE_LICENSE("GPL");
index 9343db21af0100514b877340922d85cb456c9205..cdd6064a31f17a36449e0ae8070adcf7645aae1b 100644 (file)
@@ -42,7 +42,9 @@
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+extern int balloon_order;
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
@@ -66,7 +68,8 @@ static void watch_target(struct xenbus_watch *watch,
        /* The given memory/target value is in KiB, so it needs converting to
         * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
         */
-       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+       balloon_set_new_target(new_target >>
+               ((PAGE_SHIFT - 10) + balloon_order));
 }
 static struct xenbus_watch target_watch = {
        .node = "memory/target",
@@ -154,7 +157,8 @@ static ssize_t store_target_kb(struct sys_device *dev,
 
        target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >>
+               (PAGE_SHIFT + balloon_order));
 
        return count;
 }
@@ -168,7 +172,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr
 {
        return sprintf(buf, "%llu\n",
                       (unsigned long long)balloon_stats.target_pages
-                      << PAGE_SHIFT);
+                      << (PAGE_SHIFT + balloon_order));
 }
 
 static ssize_t store_target(struct sys_device *dev,
@@ -184,7 +188,8 @@ static ssize_t store_target(struct sys_device *dev,
 
        target_bytes = memparse(buf, &endchar);
 
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+       balloon_set_new_target(target_bytes >>
+                              (PAGE_SHIFT + balloon_order));
 
        return count;
 }
index 6a6e91449347078a3ac2c3d66a10578e19e465fd..04a440ffeda1b1c527caa62f69ea4813d9d8cc0f 100644 (file)
  * cmd: MMUEXT_SET_LDT
  * linear_addr: Linear address of LDT base (NB. must be page-aligned).
  * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ *
+ * cmd: MMUEXT_MARK_SUPER
+ * mfn: Machine frame number of head of superpage to be marked.
+ *
+ * cmd: MMUEXT_UNMARK_SUPER
+ * mfn: Machine frame number of head of superpage to be cleared.
  */
 #define MMUEXT_PIN_L1_TABLE      0
 #define MMUEXT_PIN_L2_TABLE      1
 #define MMUEXT_FLUSH_CACHE      12
 #define MMUEXT_SET_LDT          13
 #define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE       16
+#define MMUEXT_COPY_PAGE        17
+#define MMUEXT_MARK_SUPER       19
+#define MMUEXT_UNMARK_SUPER     20
 
 #ifndef __ASSEMBLY__
 struct mmuext_op {