* Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
-extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
+extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 
 #ifdef CONFIG_NUMA
-static inline void zone_numa_state_add(long x, struct zone *zone,
-                                enum numa_stat_item item)
+static inline void zone_numa_event_add(long x, struct zone *zone,
+                               enum numa_stat_item item)
 {
-       atomic_long_add(x, &zone->vm_numa_stat[item]);
-       atomic_long_add(x, &vm_numa_stat[item]);
+       atomic_long_add(x, &zone->vm_numa_event[item]);
+       atomic_long_add(x, &vm_numa_event[item]);
 }
 
-static inline unsigned long global_numa_state(enum numa_stat_item item)
+static inline unsigned long zone_numa_event_state(struct zone *zone,
+                                       enum numa_stat_item item)
 {
-       long x = atomic_long_read(&vm_numa_stat[item]);
-
-       return x;
+       return atomic_long_read(&zone->vm_numa_event[item]);
 }
 
-static inline unsigned long zone_numa_state_snapshot(struct zone *zone,
-                                       enum numa_stat_item item)
+static inline unsigned long
+global_numa_event_state(enum numa_stat_item item)
 {
-       long x = atomic_long_read(&zone->vm_numa_stat[item]);
-       int cpu;
-
-       for_each_online_cpu(cpu)
-               x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item];
-
-       return x;
+       return atomic_long_read(&vm_numa_event[item]);
 }
 #endif /* CONFIG_NUMA */
 
 }
 
 #ifdef CONFIG_NUMA
-extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
+extern void __count_numa_event(struct zone *zone, enum numa_stat_item item);
 extern unsigned long sum_zone_node_page_state(int node,
                                              enum zone_stat_item item);
-extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
+extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
 extern unsigned long node_page_state(struct pglist_data *pgdat,
                                                enum node_stat_item item);
 extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
                                           enum node_stat_item item);
+extern void fold_vm_numa_events(void);
 #else
 #define sum_zone_node_page_state(node, item) global_zone_page_state(item)
 #define node_page_state(node, item) global_node_page_state(item)
 #define node_page_state_pages(node, item) global_node_page_state_pages(item)
+static inline void fold_vm_numa_events(void)
+{
+}
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_SMP
 static inline const char *node_stat_name(enum node_stat_item item)
 {
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-                          NR_VM_NUMA_STAT_ITEMS +
+                          NR_VM_NUMA_EVENT_ITEMS +
                           item];
 }
 
 static inline const char *writeback_stat_name(enum writeback_stat_item item)
 {
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-                          NR_VM_NUMA_STAT_ITEMS +
+                          NR_VM_NUMA_EVENT_ITEMS +
                           NR_VM_NODE_STAT_ITEMS +
                           item];
 }
 static inline const char *vm_event_name(enum vm_event_item item)
 {
        return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
-                          NR_VM_NUMA_STAT_ITEMS +
+                          NR_VM_NUMA_EVENT_ITEMS +
                           NR_VM_NODE_STAT_ITEMS +
                           NR_VM_WRITEBACK_STAT_ITEMS +
                           item];
 
 
 #include "internal.h"
 
-#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
-
 #ifdef CONFIG_NUMA
 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
 
 {
        int item, cpu;
 
-       for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
-               atomic_long_set(&zone->vm_numa_stat[item], 0);
-               for_each_online_cpu(cpu)
-                       per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item]
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
+               atomic_long_set(&zone->vm_numa_event[item], 0);
+               for_each_online_cpu(cpu) {
+                       per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
                                                = 0;
+               }
        }
 }
 
 {
        int item;
 
-       for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
-               atomic_long_set(&vm_numa_stat[item], 0);
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+               atomic_long_set(&vm_numa_event[item], 0);
 }
 
 static void invalid_numa_statistics(void)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
-atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
-EXPORT_SYMBOL(vm_numa_stat);
 EXPORT_SYMBOL(vm_node_stat);
 
 #ifdef CONFIG_SMP
  * Fold a differential into the global counters.
  * Returns the number of counters updated.
  */
-#ifdef CONFIG_NUMA
-static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
+static int fold_diff(int *zone_diff, int *node_diff)
 {
        int i;
        int changes = 0;
                        changes++;
        }
 
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               if (numa_diff[i]) {
-                       atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
-                       changes++;
-       }
-
        for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
                if (node_diff[i]) {
                        atomic_long_add(node_diff[i], &vm_node_stat[i]);
        }
        return changes;
 }
-#else
-static int fold_diff(int *zone_diff, int *node_diff)
+
+#ifdef CONFIG_NUMA
+static void fold_vm_zone_numa_events(struct zone *zone)
 {
-       int i;
-       int changes = 0;
+       unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
+       int cpu;
+       enum numa_stat_item item;
 
-       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (zone_diff[i]) {
-                       atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
-                       changes++;
-       }
+       for_each_online_cpu(cpu) {
+               struct per_cpu_zonestat *pzstats;
 
-       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-               if (node_diff[i]) {
-                       atomic_long_add(node_diff[i], &vm_node_stat[i]);
-                       changes++;
+               pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+               for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+                       zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
        }
-       return changes;
+
+       for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
+               zone_numa_event_add(zone_numa_events[item], zone, item);
 }
-#endif /* CONFIG_NUMA */
+
+void fold_vm_numa_events(void)
+{
+       struct zone *zone;
+
+       for_each_populated_zone(zone)
+               fold_vm_zone_numa_events(zone);
+}
+#endif
 
 /*
  * Update the zone counters for the current cpu.
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
        int changes = 0;
 
                        }
                }
 #ifdef CONFIG_NUMA
-               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
-                       int v;
-
-                       v = this_cpu_xchg(pzstats->vm_numa_stat_diff[i], 0);
-                       if (v) {
-
-                               atomic_long_add(v, &zone->vm_numa_stat[i]);
-                               global_numa_diff[i] += v;
-                               __this_cpu_write(pcp->expire, 3);
-                       }
-               }
 
                if (do_pagesets) {
                        cond_resched();
                }
        }
 
-#ifdef CONFIG_NUMA
-       changes += fold_diff(global_zone_diff, global_numa_diff,
-                            global_node_diff);
-#else
        changes += fold_diff(global_zone_diff, global_node_diff);
-#endif
        return changes;
 }
 
        struct zone *zone;
        int i;
        int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
-#ifdef CONFIG_NUMA
-       int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
-#endif
        int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
 
        for_each_populated_zone(zone) {
 
                pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
 
-               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
                        if (pzstats->vm_stat_diff[i]) {
                                int v;
 
                                atomic_long_add(v, &zone->vm_stat[i]);
                                global_zone_diff[i] += v;
                        }
-
+               }
 #ifdef CONFIG_NUMA
-               for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-                       if (pzstats->vm_numa_stat_diff[i]) {
-                               int v;
+               for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+                       if (pzstats->vm_numa_event[i]) {
+                               unsigned long v;
 
-                               v = pzstats->vm_numa_stat_diff[i];
-                               pzstats->vm_numa_stat_diff[i] = 0;
-                               atomic_long_add(v, &zone->vm_numa_stat[i]);
-                               global_numa_diff[i] += v;
+                               v = pzstats->vm_numa_event[i];
+                               pzstats->vm_numa_event[i] = 0;
+                               zone_numa_event_add(v, zone, i);
                        }
+               }
 #endif
        }
 
                        }
        }
 
-#ifdef CONFIG_NUMA
-       fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
-#else
        fold_diff(global_zone_diff, global_node_diff);
-#endif
 }
 
 /*
  */
 void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
 {
+       unsigned long v;
        int i;
 
-       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
                if (pzstats->vm_stat_diff[i]) {
-                       int v = pzstats->vm_stat_diff[i];
+                       v = pzstats->vm_stat_diff[i];
                        pzstats->vm_stat_diff[i] = 0;
-                       atomic_long_add(v, &zone->vm_stat[i]);
-                       atomic_long_add(v, &vm_zone_stat[i]);
+                       zone_page_state_add(v, zone, i);
                }
+       }
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               if (pzstats->vm_numa_stat_diff[i]) {
-                       int v = pzstats->vm_numa_stat_diff[i];
-
-                       pzstats->vm_numa_stat_diff[i] = 0;
-                       atomic_long_add(v, &zone->vm_numa_stat[i]);
-                       atomic_long_add(v, &vm_numa_stat[i]);
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
+               if (pzstats->vm_numa_event[i]) {
+                       v = pzstats->vm_numa_event[i];
+                       pzstats->vm_numa_event[i] = 0;
+                       zone_numa_event_add(v, zone, i);
                }
+       }
 #endif
 }
 #endif
 
 #ifdef CONFIG_NUMA
-void __inc_numa_state(struct zone *zone,
+/* See __count_vm_event comment on why raw_cpu_inc is used. */
+void __count_numa_event(struct zone *zone,
                                 enum numa_stat_item item)
 {
        struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
-       u16 __percpu *p = pzstats->vm_numa_stat_diff + item;
-       u16 v;
-
-       v = __this_cpu_inc_return(*p);
 
-       if (unlikely(v > NUMA_STATS_THRESHOLD)) {
-               zone_numa_state_add(v, zone, item);
-               __this_cpu_write(*p, 0);
-       }
+       raw_cpu_inc(pzstats->vm_numa_event[item]);
 }
 
 /*
        return count;
 }
 
-/*
- * Determine the per node value of a numa stat item. To avoid deviation,
- * the per cpu stat number in vm_numa_stat_diff[] is also included.
- */
-unsigned long sum_zone_numa_state(int node,
+/* Determine the per node value of a numa stat item. */
+unsigned long sum_zone_numa_event_state(int node,
                                 enum numa_stat_item item)
 {
        struct zone *zones = NODE_DATA(node)->node_zones;
-       int i;
        unsigned long count = 0;
+       int i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
-               count += zone_numa_state_snapshot(zones + i, item);
+               count += zone_numa_event_state(zones + i, item);
 
        return count;
 }
                           zone_page_state(zone, i));
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
                seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
-                          zone_numa_state_snapshot(zone, i));
+                          zone_numa_event_state(zone, i));
 #endif
 
        seq_printf(m, "\n  pagesets");
 };
 
 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
-                        NR_VM_NUMA_STAT_ITEMS + \
+                        NR_VM_NUMA_EVENT_ITEMS + \
                         NR_VM_NODE_STAT_ITEMS + \
                         NR_VM_WRITEBACK_STAT_ITEMS + \
                         (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
                return NULL;
 
        BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
+       fold_vm_numa_events();
        v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
        m->private = v;
        if (!v)
        v += NR_VM_ZONE_STAT_ITEMS;
 
 #ifdef CONFIG_NUMA
-       for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
-               v[i] = global_numa_state(i);
-       v += NR_VM_NUMA_STAT_ITEMS;
+       for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
+               v[i] = global_numa_event_state(i);
+       v += NR_VM_NUMA_EVENT_ITEMS;
 #endif
 
        for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
                if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
                               sizeof(pzstats->vm_stat_diff[0])))
                        return true;
-#ifdef CONFIG_NUMA
-               if (memchr_inv(pzstats->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
-                              sizeof(pzstats->vm_numa_stat_diff[0])))
-                       return true;
-#endif
+
                if (last_pgdat == zone->zone_pgdat)
                        continue;
                last_pgdat = zone->zone_pgdat;