From 72418f69cc0383e036ace9d54f846ae37fd36bf6 Mon Sep 17 00:00:00 2001 From: Dave McCracken Date: Fri, 20 Jan 2012 09:34:37 -0600 Subject: [PATCH] Add support for pv hugepages and support for huge balloon pages. Signed-off-by: Dave McCracken --- arch/x86/include/asm/hugetlb.h | 41 ++++++++--- arch/x86/mm/hugetlbpage.c | 5 +- arch/x86/xen/mmu.c | 45 ++++++++++++ drivers/xen/balloon.c | 123 ++++++++++++++++++++++++++------- drivers/xen/xen-balloon.c | 15 ++-- include/xen/interface/xen.h | 17 +++++ 6 files changed, 206 insertions(+), 40 deletions(-) diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 439a9acc132d..28b0de953753 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -36,16 +36,24 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, free_pgd_range(tlb, addr, end, floor, ceiling); } +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - set_pte_at(mm, addr, ptep, pte); + set_pmd((pmd_t *)ptep, native_make_pmd(native_pte_val(pte))); } static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return ptep_get_and_clear(mm, addr, ptep); + pte_t pte = huge_ptep_get(ptep); + + set_huge_pte_at(mm, addr, ptep, __pte(0)); + return pte; } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -66,28 +74,45 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_set_wrprotect(mm, addr, ptep); + pte_t pte = huge_ptep_get(ptep); + + pte = pte_wrprotect(pte); + set_huge_pte_at(mm, addr, ptep, pte); } static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} + pte_t oldpte = huge_ptep_get(ptep); + int changed = !pte_same(oldpte, pte); -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - return *ptep; + if (changed && dirty) { + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + flush_tlb_page(vma, addr); + } + + return changed; } +#ifdef CONFIG_XEN +int xen_prepare_hugepage(struct page *page); +void xen_release_hugepage(struct page *page); +#endif static inline int arch_prepare_hugepage(struct page *page) { +#ifdef CONFIG_XEN + return xen_prepare_hugepage(page); +#else return 0; +#endif } static inline void arch_release_hugepage(struct page *page) { +#ifdef CONFIG_XEN + return xen_release_hugepage(page); +#endif } #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f581a18c0d4d..d8fdb1af100c 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -117,6 +117,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) pgd_t *pgd = pgd_offset(mm, *addr); pud_t *pud = pud_offset(pgd, *addr); + if (xen_pv_domain()) + return 0; + BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) return 0; @@ -141,7 +144,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, pte = (pte_t *)pud; } else { BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) + if (!xen_pv_domain() && pud_none(*pud)) huge_pmd_share(mm, addr, pud); pte = (pte_t *) pmd_alloc(mm, pud, addr); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 1d9858e07288..c85780a43f5b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2364,3 +2364,48 @@ out: return err; } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); + +int xen_prepare_hugepage(struct page *page) +{ + struct mmuext_op op; + unsigned long pfn, mfn, m; + int i; + int rc; + + if (!xen_pv_domain()) + return 0; + + pfn = page_to_pfn(page); + mfn = pfn_to_mfn(pfn); + if (mfn & ((HPAGE_SIZE/PAGE_SIZE)-1)) { + printk("Guest pages are not properly aligned to use hugepages\n"); + return 1; + } + for (i = 0, m = mfn; i < HPAGE_SIZE/PAGE_SIZE; i++, pfn++, m++) { + if (pfn_to_mfn(pfn) != m) { + printk("Guest pages are not properly aligned to use hugepages\n"); + return 1; + } + } + + op.cmd = MMUEXT_MARK_SUPER; + op.arg1.mfn = mfn; + rc = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + if (rc) { + printk("Xen hypervisor is not configured to allow hugepages\n"); + return 1; + } + return 0; +} + +void xen_release_hugepage(struct page *page) +{ + struct mmuext_op op; + + if (!xen_pv_domain()) + return; + + op.cmd = MMUEXT_UNMARK_SUPER; + op.arg1.mfn = pfn_to_mfn(page_to_pfn(page)); + HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); +} diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 1a23033a1ac5..9ddd8323bc5a 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -86,6 +86,14 @@ static DEFINE_MUTEX(balloon_mutex); struct balloon_stats balloon_stats; EXPORT_SYMBOL_GPL(balloon_stats); +/* + * Work in pages of this order. Can be either 0 for normal pages + * or 9 for hugepages. + */ +int balloon_order; +static unsigned long balloon_npages; +static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)]; + /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; @@ -112,10 +120,41 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); static void scrub_page(struct page *page) { #ifdef CONFIG_XEN_SCRUB_PAGES - clear_highpage(page); + int i; + + for (i = 0; i < balloon_npages; i++) + clear_highpage(page++); #endif } +static void free_discontig_frame(void) +{ + int rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .domid = DOMID_SELF, + .nr_extents = balloon_npages, + .extent_order = 0 + }; + + set_xen_guest_handle(reservation.extent_start, discontig_frame_list); + rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(rc != balloon_npages); +} + +static unsigned long shrink_frame(unsigned long nr_pages) +{ + unsigned long i, j; + + for (i = 0, j = 0; i < nr_pages; i++, j++) { + if (frame_list[i] == 0) + j++; + if (i != j) + frame_list[i] = frame_list[j]; + } + return i; +} + /* balloon_append: add the given page to the balloon. */ static void __balloon_append(struct page *page) { @@ -134,7 +173,7 @@ static void balloon_append(struct page *page) __balloon_append(page); if (PageHighMem(page)) dec_totalhigh_pages(); - totalram_pages--; + totalram_pages -= balloon_npages; } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ @@ -157,7 +196,7 @@ static struct page *balloon_retrieve(bool prefer_highmem) } else balloon_stats.balloon_low--; - totalram_pages++; + totalram_pages += balloon_npages; return page; } @@ -313,11 +352,10 @@ static enum bp_state reserve_additional_memory(long credit) static enum bp_state increase_reservation(unsigned long nr_pages) { int rc; - unsigned long pfn, i; + unsigned long pfn, mfn, i, j; struct page *page; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -345,6 +383,8 @@ static enum bp_state increase_reservation(unsigned long nr_pages) set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc <= 0) return BP_EAGAIN; @@ -354,19 +394,22 @@ static enum bp_state increase_reservation(unsigned long nr_pages) BUG_ON(page == NULL); pfn = page_to_pfn(page); + mfn = frame_list[i]; BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); - set_phys_to_machine(pfn, frame_list[i]); - - /* Link back into the page tables if not highmem. */ - if (xen_pv_domain() && !PageHighMem(page)) { - int ret; - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(frame_list[i], PAGE_KERNEL), - 0); - BUG_ON(ret); + for (j = 0; j < balloon_npages; j++, pfn++, mfn++) { + set_phys_to_machine(pfn, mfn); + + /* Link back into the page tables if not highmem. */ + if (xen_pv_domain() && !PageHighMem(page)) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(mfn, PAGE_KERNEL), + 0); + BUG_ON(ret); + } } /* Relinquish the page back to the allocator. */ @@ -383,12 +426,12 @@ static enum bp_state increase_reservation(unsigned long nr_pages) static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) { enum bp_state state = BP_DONE; - unsigned long pfn, i; + unsigned long pfn, lpfn, mfn, i, j; + int discontig, discontig_free; struct page *page; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, - .extent_order = 0, .domid = DOMID_SELF }; @@ -405,7 +448,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { - if ((page = alloc_page(gfp)) == NULL) { + if ((page = alloc_pages(gfp, balloon_order)) == NULL) { nr_pages = i; state = BP_EAGAIN; break; @@ -430,19 +473,36 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) flush_tlb_all(); /* No more mappings: invalidate P2M and add to balloon. */ + discontig = 0; for (i = 0; i < nr_pages; i++) { - pfn = mfn_to_pfn(frame_list[i]); - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + mfn = frame_list[i]; + lpfn = pfn = mfn_to_pfn(mfn); balloon_append(pfn_to_page(pfn)); + discontig_free = 0; + for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { + if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) + != mfn) + discontig_free = 1; + + set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); + } + if (discontig_free) { + free_discontig_frame(); + frame_list[i] = 0; + discontig = 1; + } } + balloon_stats.current_pages -= nr_pages; + + if (discontig) + nr_pages = shrink_frame(nr_pages); set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; + reservation.extent_order = balloon_order; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); BUG_ON(ret != nr_pages); - balloon_stats.current_pages -= nr_pages; - return state; } @@ -571,7 +631,7 @@ static void __init balloon_add_region(unsigned long start_pfn, */ extra_pfn_end = min(max_pfn, start_pfn + pages); - for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { + for (pfn = start_pfn; pfn < extra_pfn_end; pfn += balloon_npages) { page = pfn_to_page(pfn); /* totalram_pages and totalhigh_pages do not include the boot-time balloon extension, so @@ -587,11 +647,14 @@ static int __init balloon_init(void) if (!xen_domain()) return -ENODEV; - pr_info("xen/balloon: Initialising balloon driver.\n"); + pr_info("xen_balloon: Initialising balloon driver with page order %d.\n", + balloon_order); + + balloon_npages = 1 << balloon_order; - balloon_stats.current_pages = xen_pv_domain() + balloon_stats.current_pages = (xen_pv_domain() ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) - : max_pfn; + : max_pfn) >> balloon_order; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -623,4 +686,12 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); +static int __init balloon_parse_huge(char *s) +{ + balloon_order = 9; + return 1; +} + +__setup("balloon_hugepages", balloon_parse_huge); + MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 9343db21af01..cdd6064a31f1 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -42,7 +42,9 @@ #include #include -#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +extern int balloon_order; + +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10)) #define BALLOON_CLASS_NAME "xen_memory" @@ -66,7 +68,8 @@ static void watch_target(struct xenbus_watch *watch, /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); + balloon_set_new_target(new_target >> + ((PAGE_SHIFT - 10) + balloon_order)); } static struct xenbus_watch target_watch = { .node = "memory/target", @@ -154,7 +157,8 @@ static ssize_t store_target_kb(struct sys_device *dev, target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> + (PAGE_SHIFT + balloon_order)); return count; } @@ -168,7 +172,7 @@ static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr { return sprintf(buf, "%llu\n", (unsigned long long)balloon_stats.target_pages - << PAGE_SHIFT); + << (PAGE_SHIFT + balloon_order)); } static ssize_t store_target(struct sys_device *dev, @@ -184,7 +188,8 @@ static ssize_t store_target(struct sys_device *dev, target_bytes = memparse(buf, &endchar); - balloon_set_new_target(target_bytes >> PAGE_SHIFT); + balloon_set_new_target(target_bytes >> + (PAGE_SHIFT + balloon_order)); return count; } diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 6a6e91449347..04a440ffeda1 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -167,6 +167,19 @@ * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_MARK_SUPER + * mfn: Machine frame number of head of superpage to be marked. + * + * cmd: MMUEXT_UNMARK_SUPER + * mfn: Machine frame number of head of superpage to be cleared. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -183,6 +196,10 @@ #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 #ifndef __ASSEMBLY__ struct mmuext_op { -- 2.50.1