]> www.infradead.org Git - linux-platform-drivers-x86.git/commitdiff
mm: use correct numa policy node for transparent hugepages
authorAndi Kleen <ak@linux.intel.com>
Sat, 5 Mar 2011 01:36:32 +0000 (17:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Mar 2011 01:53:39 +0000 (17:53 -0800)
Pass down the correct node for a transparent hugepage allocation.  Most
callers continue to use the current node, however the hugepaged daemon
now uses the previous node of the first to be collapsed page instead.
This ensures that khugepaged does not mess up local memory for an
existing process which uses local policy.

The choice of node is somewhat primitive currently: it just uses the
node of the first page in the pmd range.  An alternative would be to
look at multiple pages and use the most popular node.  I used the
simplest variant for now which should work well enough for the case of
all pages being on the same node.

[akpm@linux-foundation.org: coding-style fixes]
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/huge_memory.c
mm/mempolicy.c

index 1802db819e282ada2e54f765488a93bd62183bb1..dbe99a5f2073927741442a13f99123de127973c7 100644 (file)
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
 
 static inline struct page *alloc_hugepage_vma(int defrag,
                                              struct vm_area_struct *vma,
-                                             unsigned long haddr)
+                                             unsigned long haddr, int nd)
 {
        return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
-                              HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+                              HPAGE_PMD_ORDER, vma, haddr, nd);
 }
 
 #ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                if (unlikely(khugepaged_enter(vma)))
                        return VM_FAULT_OOM;
                page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                         vma, haddr);
+                                         vma, haddr, numa_node_id());
                if (unlikely(!page))
                        goto out;
                if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow())
                new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                             vma, haddr);
+                                             vma, haddr, numa_node_id());
        else
                new_page = NULL;
 
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 static void collapse_huge_page(struct mm_struct *mm,
                               unsigned long address,
                               struct page **hpage,
-                              struct vm_area_struct *vma)
+                              struct vm_area_struct *vma,
+                              int node)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -1773,7 +1774,8 @@ static void collapse_huge_page(struct mm_struct *mm,
         * mmap_sem in read mode is good idea also to allow greater
         * scalability.
         */
-       new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+       new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
+                                     node);
        if (unlikely(!new_page)) {
                up_read(&mm->mmap_sem);
                *hpage = ERR_PTR(-ENOMEM);
@@ -1919,6 +1921,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
        struct page *page;
        unsigned long _address;
        spinlock_t *ptl;
+       int node = -1;
 
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
@@ -1949,6 +1952,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                page = vm_normal_page(vma, _address, pteval);
                if (unlikely(!page))
                        goto out_unmap;
+               /*
+                * Chose the node of the first page. This could
+                * be more sophisticated and look at more pages,
+                * but isn't for now.
+                */
+               if (node == -1)
+                       node = page_to_nid(page);
                VM_BUG_ON(PageCompound(page));
                if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
                        goto out_unmap;
@@ -1965,7 +1975,7 @@ out_unmap:
        pte_unmap_unlock(pte, ptl);
        if (ret)
                /* collapse_huge_page will return with the mmap_sem released */
-               collapse_huge_page(mm, address, hpage, vma);
+               collapse_huge_page(mm, address, hpage, vma, node);
 out:
        return ret;
 }
index 25a5a91466195da645cb757cce34b91f85a9fd45..b53ec99f142897a30d1513af8bcfe375dc392102 100644 (file)
@@ -1891,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
                page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
        else
                page = __alloc_pages_nodemask(gfp, order,
-                       policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
+                               policy_zonelist(gfp, pol, numa_node_id()),
+                               policy_nodemask(gfp, pol));
        put_mems_allowed();
        return page;
 }