struct mem_cgroup_stat_cpu {
        s64 count[MEM_CGROUP_STAT_NSTATS];
-} ____cacheline_aligned_in_smp;
-
-struct mem_cgroup_stat {
-       struct mem_cgroup_stat_cpu cpustat[0];
 };
 
-static inline void
-__mem_cgroup_stat_set_safe(struct mem_cgroup_stat_cpu *stat,
-                               enum mem_cgroup_stat_index idx, s64 val)
-{
-       stat->count[idx] = val;
-}
-
-static inline s64
-__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
-                               enum mem_cgroup_stat_index idx)
-{
-       return stat->count[idx];
-}
-
-/*
- * For accounting under irq disable, no need for increment preempt count.
- */
-static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat,
-               enum mem_cgroup_stat_index idx, int val)
-{
-       stat->count[idx] += val;
-}
-
-static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
-               enum mem_cgroup_stat_index idx)
-{
-       int cpu;
-       s64 ret = 0;
-       for_each_possible_cpu(cpu)
-               ret += stat->cpustat[cpu].count[idx];
-       return ret;
-}
-
-static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
-{
-       s64 ret;
-
-       ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
-       ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
-       return ret;
-}
-
 /*
  * per-zone information in memory controller.
  */
        unsigned long   move_charge_at_immigrate;
 
        /*
-        * statistics. This must be placed at the end of memcg.
+        * percpu counter.
         */
-       struct mem_cgroup_stat stat;
+       struct mem_cgroup_stat_cpu *stat;
 };
 
 /* Stuffs for move charges at task migration. */
 static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
 {
        bool ret = false;
-       int cpu;
        s64 val;
-       struct mem_cgroup_stat_cpu *cpustat;
 
-       cpu = get_cpu();
-       cpustat = &mem->stat.cpustat[cpu];
-       val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_SOFTLIMIT);
+       val = this_cpu_read(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT]);
        if (unlikely(val < 0)) {
-               __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT,
+               this_cpu_write(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT],
                                SOFTLIMIT_EVENTS_THRESH);
                ret = true;
        }
-       put_cpu();
        return ret;
 }
 
        return mz;
 }
 
+static s64 mem_cgroup_read_stat(struct mem_cgroup *mem,
+               enum mem_cgroup_stat_index idx)
+{
+       int cpu;
+       s64 val = 0;
+
+       for_each_possible_cpu(cpu)
+               val += per_cpu(mem->stat->count[idx], cpu);
+       return val;
+}
+
+static s64 mem_cgroup_local_usage(struct mem_cgroup *mem)
+{
+       s64 ret;
+
+       ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
+       ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
+       return ret;
+}
+
 static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
                                         bool charge)
 {
        int val = (charge) ? 1 : -1;
-       struct mem_cgroup_stat *stat = &mem->stat;
-       struct mem_cgroup_stat_cpu *cpustat;
-       int cpu = get_cpu();
-
-       cpustat = &stat->cpustat[cpu];
-       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
-       put_cpu();
+       this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
 }
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
                                         bool charge)
 {
        int val = (charge) ? 1 : -1;
-       struct mem_cgroup_stat *stat = &mem->stat;
-       struct mem_cgroup_stat_cpu *cpustat;
-       int cpu = get_cpu();
 
-       cpustat = &stat->cpustat[cpu];
+       preempt_disable();
+
        if (PageCgroupCache(pc))
-               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val);
+               __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val);
        else
-               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val);
+               __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val);
 
        if (charge)
-               __mem_cgroup_stat_add_safe(cpustat,
-                               MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
+               __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
        else
-               __mem_cgroup_stat_add_safe(cpustat,
-                               MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
-       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT, -1);
-       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS, -1);
+               __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
+       __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT]);
+       __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS]);
 
-       put_cpu();
+       preempt_enable();
 }
 
 static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
                                }
                        }
                }
-               if (!mem_cgroup_local_usage(&victim->stat)) {
+               if (!mem_cgroup_local_usage(victim)) {
                        /* this cgroup's local usage == 0 */
                        css_put(&victim->css);
                        continue;
 void mem_cgroup_update_file_mapped(struct page *page, int val)
 {
        struct mem_cgroup *mem;
-       struct mem_cgroup_stat *stat;
-       struct mem_cgroup_stat_cpu *cpustat;
-       int cpu;
        struct page_cgroup *pc;
 
        pc = lookup_page_cgroup(page);
                goto done;
 
        /*
-        * Preemption is already disabled, we don't need get_cpu()
+        * Preemption is already disabled. We can use __this_cpu_xxx
         */
-       cpu = smp_processor_id();
-       stat = &mem->stat;
-       cpustat = &stat->cpustat[cpu];
+       __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val);
 
-       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val);
 done:
        unlock_page_cgroup(pc);
 }
        struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
 {
        struct page *page;
-       int cpu;
-       struct mem_cgroup_stat *stat;
-       struct mem_cgroup_stat_cpu *cpustat;
 
        VM_BUG_ON(from == to);
        VM_BUG_ON(PageLRU(pc->page));
 
        page = pc->page;
        if (page_mapped(page) && !PageAnon(page)) {
-               cpu = smp_processor_id();
-               /* Update mapped_file data for mem_cgroup "from" */
-               stat = &from->stat;
-               cpustat = &stat->cpustat[cpu];
-               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
-                                               -1);
-
-               /* Update mapped_file data for mem_cgroup "to" */
-               stat = &to->stat;
-               cpustat = &stat->cpustat[cpu];
-               __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED,
-                                               1);
+               /* Update mapped_file data for mem_cgroup */
+               preempt_disable();
+               __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+               __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+               preempt_enable();
        }
        mem_cgroup_charge_statistics(from, pc, false);
        if (uncharge)
 mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
 {
        struct mem_cgroup_idx_data *d = data;
-       d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+       d->val += mem_cgroup_read_stat(mem, d->idx);
        return 0;
 }
 
        s64 val;
 
        /* per cpu stat */
-       val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE);
+       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
        s->stat[MCS_CACHE] += val * PAGE_SIZE;
-       val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
+       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
        s->stat[MCS_RSS] += val * PAGE_SIZE;
-       val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED);
+       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED);
        s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
-       val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT);
+       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGIN_COUNT);
        s->stat[MCS_PGPGIN] += val;
-       val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
+       val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGOUT_COUNT);
        s->stat[MCS_PGPGOUT] += val;
        if (do_swap_account) {
-               val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+               val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);
                s->stat[MCS_SWAP] += val * PAGE_SIZE;
        }
 
 static bool mem_cgroup_threshold_check(struct mem_cgroup *mem)
 {
        bool ret = false;
-       int cpu;
        s64 val;
-       struct mem_cgroup_stat_cpu *cpustat;
 
-       cpu = get_cpu();
-       cpustat = &mem->stat.cpustat[cpu];
-       val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_THRESHOLDS);
+       val = this_cpu_read(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS]);
        if (unlikely(val < 0)) {
-               __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS,
+               this_cpu_write(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS],
                                THRESHOLDS_EVENTS_THRESH);
                ret = true;
        }
-       put_cpu();
        return ret;
 }
 
        kfree(mem->info.nodeinfo[node]);
 }
 
-static int mem_cgroup_size(void)
-{
-       int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu);
-       return sizeof(struct mem_cgroup) + cpustat_size;
-}
-
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
        struct mem_cgroup *mem;
-       int size = mem_cgroup_size();
+       int size = sizeof(struct mem_cgroup);
 
+       /* Can be very big if MAX_NUMNODES is very big */
        if (size < PAGE_SIZE)
                mem = kmalloc(size, GFP_KERNEL);
        else
 
        if (mem)
                memset(mem, 0, size);
+       mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
+       if (!mem->stat) {
+               if (size < PAGE_SIZE)
+                       kfree(mem);
+               else
+                       vfree(mem);
+               mem = NULL;
+       }
        return mem;
 }
 
        for_each_node_state(node, N_POSSIBLE)
                free_mem_cgroup_per_zone_info(mem, node);
 
-       if (mem_cgroup_size() < PAGE_SIZE)
+       free_percpu(mem->stat);
+       if (sizeof(struct mem_cgroup) < PAGE_SIZE)
                kfree(mem);
        else
                vfree(mem);