#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
 extern void set_numabalancing_state(bool enabled);
 #else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+                                  bool migrated)
 {
 }
 static inline void set_numabalancing_state(bool enabled)
 
  */
 unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3;
 
+static inline int task_faults_idx(int nid, int priv)
+{
+       return 2 * nid + priv;
+}
+
+static inline unsigned long task_faults(struct task_struct *p, int nid)
+{
+       if (!p->numa_faults)
+               return 0;
+
+       return p->numa_faults[task_faults_idx(nid, 0)] +
+               p->numa_faults[task_faults_idx(nid, 1)];
+}
+
 static unsigned long weighted_cpuload(const int cpu);
 
 
        /* Find the node with the highest number of faults */
        for_each_online_node(nid) {
                unsigned long faults;
+               int priv, i;
 
-               /* Decay existing window and copy faults since last scan */
-               p->numa_faults[nid] >>= 1;
-               p->numa_faults[nid] += p->numa_faults_buffer[nid];
-               p->numa_faults_buffer[nid] = 0;
+               for (priv = 0; priv < 2; priv++) {
+                       i = task_faults_idx(nid, priv);
 
-               faults = p->numa_faults[nid];
+                       /* Decay existing window, copy faults since last scan */
+                       p->numa_faults[i] >>= 1;
+                       p->numa_faults[i] += p->numa_faults_buffer[i];
+                       p->numa_faults_buffer[i] = 0;
+               }
+
+               /* Find maximum private faults */
+               faults = p->numa_faults[task_faults_idx(nid, 1)];
                if (faults > max_faults) {
                        max_faults = faults;
                        max_nid = nid;
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int node, int pages, bool migrated)
+void task_numa_fault(int last_nid, int node, int pages, bool migrated)
 {
        struct task_struct *p = current;
+       int priv;
 
        if (!numabalancing_enabled)
                return;
 
+       /* For now, do not attempt to detect private/shared accesses */
+       priv = 1;
+
        /* Allocate buffer to track faults on a per-node basis */
        if (unlikely(!p->numa_faults)) {
-               int size = sizeof(*p->numa_faults) * nr_node_ids;
+               int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;
 
                /* numa_faults and numa_faults_buffer share the allocation */
                p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN);
                        return;
 
                BUG_ON(p->numa_faults_buffer);
-               p->numa_faults_buffer = p->numa_faults + nr_node_ids;
+               p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids);
        }
 
        /*
 
        task_numa_placement(p);
 
-       p->numa_faults_buffer[node] += pages;
+       p->numa_faults_buffer[task_faults_idx(node, priv)] += pages;
 }
 
 static void reset_ptenuma_scan(struct task_struct *p)
                return false;
 
        if (dst_nid == p->numa_preferred_nid ||
-           p->numa_faults[dst_nid] > p->numa_faults[src_nid])
+           task_faults(p, dst_nid) > task_faults(p, src_nid))
                return true;
 
        return false;
            p->numa_migrate_seq >= sysctl_numa_balancing_settle_count)
                return false;
 
-       if (p->numa_faults[dst_nid] < p->numa_faults[src_nid])
+       if (task_faults(p, dst_nid) < task_faults(p, src_nid))
                return true;
 
        return false;
 
        struct page *page;
        unsigned long haddr = addr & HPAGE_PMD_MASK;
        int page_nid = -1, this_nid = numa_node_id();
-       int target_nid;
+       int target_nid, last_nid = -1;
        bool page_locked;
        bool migrated = false;
 
        page = pmd_page(pmd);
        BUG_ON(is_huge_zero_page(page));
        page_nid = page_to_nid(page);
+       last_nid = page_nid_last(page);
        count_vm_numa_event(NUMA_HINT_FAULTS);
        if (page_nid == this_nid)
                count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
                page_unlock_anon_vma_read(anon_vma);
 
        if (page_nid != -1)
-               task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
+               task_numa_fault(last_nid, page_nid, HPAGE_PMD_NR, migrated);
 
        return 0;
 }
 
        struct page *page = NULL;
        spinlock_t *ptl;
        int page_nid = -1;
+       int last_nid;
        int target_nid;
        bool migrated = false;
 
        }
        BUG_ON(is_zero_pfn(page_to_pfn(page)));
 
+       last_nid = page_nid_last(page);
        page_nid = page_to_nid(page);
        target_nid = numa_migrate_prep(page, vma, addr, page_nid);
        pte_unmap_unlock(ptep, ptl);
 
 out:
        if (page_nid != -1)
-               task_numa_fault(page_nid, 1, migrated);
+               task_numa_fault(last_nid, page_nid, 1, migrated);
        return 0;
 }
 
        unsigned long offset;
        spinlock_t *ptl;
        bool numa = false;
+       int last_nid;
 
        spin_lock(&mm->page_table_lock);
        pmd = *pmdp;
                if (unlikely(page_mapcount(page) != 1))
                        continue;
 
+               last_nid = page_nid_last(page);
                page_nid = page_to_nid(page);
                target_nid = numa_migrate_prep(page, vma, addr, page_nid);
                pte_unmap_unlock(pte, ptl);
                }
 
                if (page_nid != -1)
-                       task_numa_fault(page_nid, 1, migrated);
+                       task_numa_fault(last_nid, page_nid, 1, migrated);
 
                pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
        }