"interleave_hit %lu\n"
                       "local_node %lu\n"
                       "other_node %lu\n",
-                      sum_zone_node_page_state(dev->id, NUMA_HIT),
-                      sum_zone_node_page_state(dev->id, NUMA_MISS),
-                      sum_zone_node_page_state(dev->id, NUMA_FOREIGN),
-                      sum_zone_node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
-                      sum_zone_node_page_state(dev->id, NUMA_LOCAL),
-                      sum_zone_node_page_state(dev->id, NUMA_OTHER));
+                      sum_zone_numa_state(dev->id, NUMA_HIT),
+                      sum_zone_numa_state(dev->id, NUMA_MISS),
+                      sum_zone_numa_state(dev->id, NUMA_FOREIGN),
+                      sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
+                      sum_zone_numa_state(dev->id, NUMA_LOCAL),
+                      sum_zone_numa_state(dev->id, NUMA_OTHER));
 }
 static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
 
                n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
                             sum_zone_node_page_state(nid, i));
 
-       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+#ifdef CONFIG_NUMA
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
                n += sprintf(buf+n, "%s %lu\n",
                             vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+                            sum_zone_numa_state(nid, i));
+#endif
+
+       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+               n += sprintf(buf+n, "%s %lu\n",
+                            vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+                            NR_VM_NUMA_STAT_ITEMS],
                             node_page_state(pgdat, i));
 
        return n;
 
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
+EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
 #ifdef CONFIG_SMP
 
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
-
+#ifdef CONFIG_NUMA
+                       per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
+                                                       = threshold;
+#endif
                        /* Base nodestat threshold on the largest populated zone. */
                        pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
                        per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
                        continue;
 
                threshold = (*calculate_pressure)(zone);
-               for_each_online_cpu(cpu)
+               for_each_online_cpu(cpu) {
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
+#ifdef CONFIG_NUMA
+                       per_cpu_ptr(zone->pageset, cpu)->numa_stat_threshold
+                                                       = threshold;
+#endif
+               }
        }
 }
 
  * Fold a differential into the global counters.
  * Returns the number of counters updated.
  */
+#ifdef CONFIG_NUMA
+static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+{
+       int i;
+       int changes = 0;
+
+       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+               if (zone_diff[i]) {
+                       atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
+                       changes++;
+       }
+
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+               if (numa_diff[i]) {
+                       atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
+                       changes++;
+       }
+
+       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+               if (node_diff[i]) {
+                       atomic_long_add(node_diff[i], &vm_node_stat[i]);
+                       changes++;
+       }
+       return changes;
+}
+#else
 static int fold_diff(int *zone_diff, int *node_diff)
 {
        int i;
        }
        return changes;
 }
+#endif /* CONFIG_NUMA */
 
 /*
  * Update the zone counters for the current cpu.
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
        int changes = 0;
 
                        }
                }
 #ifdef CONFIG_NUMA
+               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+                       int v;
+
+                       v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
+                       if (v) {
+
+                               atomic_long_add(v, &zone->vm_numa_stat[i]);
+                               global_numa_diff[i] += v;
+                               __this_cpu_write(p->expire, 3);
+                       }
+               }
+
                if (do_pagesets) {
                        cond_resched();
                        /*
                }
        }
 
+#ifdef CONFIG_NUMA
+       changes += fold_diff(global_zone_diff, global_numa_diff,
+                            global_node_diff);
+#else
        changes += fold_diff(global_zone_diff, global_node_diff);
+#endif
        return changes;
 }
 
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+#ifdef CONFIG_NUMA
+       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
+#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
        for_each_populated_zone(zone) {
                                atomic_long_add(v, &zone->vm_stat[i]);
                                global_zone_diff[i] += v;
                        }
+
+#ifdef CONFIG_NUMA
+               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+                       if (p->vm_numa_stat_diff[i]) {
+                               int v;
+
+                               v = p->vm_numa_stat_diff[i];
+                               p->vm_numa_stat_diff[i] = 0;
+                               atomic_long_add(v, &zone->vm_numa_stat[i]);
+                               global_numa_diff[i] += v;
+                       }
+#endif
        }
 
        for_each_online_pgdat(pgdat) {
                        }
        }
 
+#ifdef CONFIG_NUMA
+       fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
+#else
        fold_diff(global_zone_diff, global_node_diff);
+#endif
 }
 
 /*
                        atomic_long_add(v, &zone->vm_stat[i]);
                        atomic_long_add(v, &vm_zone_stat[i]);
                }
+
+#ifdef CONFIG_NUMA
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+               if (pset->vm_numa_stat_diff[i]) {
+                       int v = pset->vm_numa_stat_diff[i];
+
+                       pset->vm_numa_stat_diff[i] = 0;
+                       atomic_long_add(v, &zone->vm_numa_stat[i]);
+                       atomic_long_add(v, &vm_numa_stat[i]);
+               }
+#endif
 }
 #endif
 
 #ifdef CONFIG_NUMA
+void __inc_numa_state(struct zone *zone,
+                                enum numa_stat_item item)
+{
+       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       s8 __percpu *p = pcp->vm_numa_stat_diff + item;
+       s8 v, t;
+
+       v = __this_cpu_inc_return(*p);
+       t = __this_cpu_read(pcp->numa_stat_threshold);
+       if (unlikely(v > t)) {
+               s8 overstep = t >> 1;
+
+               zone_numa_state_add(v + overstep, zone, item);
+               __this_cpu_write(*p, -overstep);
+       }
+}
+
 /*
  * Determine the per node value of a stat item. This function
  * is called frequently in a NUMA machine, so try to be as
        return count;
 }
 
+unsigned long sum_zone_numa_state(int node,
+                                enum numa_stat_item item)
+{
+       struct zone *zones = NODE_DATA(node)->node_zones;
+       int i;
+       unsigned long count = 0;
+
+       for (i = 0; i < MAX_NR_ZONES; i++)
+               count += zone_numa_state(zones + i, item);
+
+       return count;
+}
+
 /*
  * Determine the per node value of a stat item.
  */
 #if IS_ENABLED(CONFIG_ZSMALLOC)
        "nr_zspages",
 #endif
+       "nr_free_cma",
+
+       /* enum numa_stat_item counters */
 #ifdef CONFIG_NUMA
        "numa_hit",
        "numa_miss",
        "numa_local",
        "numa_other",
 #endif
-       "nr_free_cma",
 
        /* Node-based counters */
        "nr_inactive_anon",
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
 
-
 #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
      defined(CONFIG_PROC_FS)
 static void *frag_start(struct seq_file *m, loff_t *pos)
                seq_printf(m, "\n  per-node stats");
                for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
                        seq_printf(m, "\n      %-12s %lu",
-                               vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+                               vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+                               NR_VM_NUMA_STAT_ITEMS],
                                node_page_state(pgdat, i));
                }
        }
                seq_printf(m, "\n      %-12s %lu", vmstat_text[i],
                                zone_page_state(zone, i));
 
+#ifdef CONFIG_NUMA
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+               seq_printf(m, "\n      %-12s %lu",
+                               vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
+                               zone_numa_state(zone, i));
+#endif
+
        seq_printf(m, "\n  pagesets");
        for_each_online_cpu(i) {
                struct per_cpu_pageset *pageset;
        if (*pos >= ARRAY_SIZE(vmstat_text))
                return NULL;
        stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
+                         NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
                          NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
                          NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
 
                v[i] = global_zone_page_state(i);
        v += NR_VM_ZONE_STAT_ITEMS;
 
+#ifdef CONFIG_NUMA
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+               v[i] = global_numa_state(i);
+       v += NR_VM_NUMA_STAT_ITEMS;
+#endif
+
        for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
                v[i] = global_node_page_state(i);
        v += NR_VM_NODE_STAT_ITEMS;
                        err = -EINVAL;
                }
        }
+#ifdef CONFIG_NUMA
+       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
+               val = atomic_long_read(&vm_numa_stat[i]);
+               if (val < 0) {
+                       pr_warn("%s: %s %ld\n",
+                               __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
+                       err = -EINVAL;
+               }
+       }
+#endif
        if (err)
                return err;
        if (write)
                struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
 
                BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
+#ifdef CONFIG_NUMA
+               BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 1);
+#endif
                /*
                 * The fast way of checking if there are any vmstat diffs.
                 * This works because the diffs are byte sized items.
                 */
                if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
                        return true;
-
+#ifdef CONFIG_NUMA
+               if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
+                       return true;
+#endif
        }
        return false;
 }