if (blkg->pd[i])
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
-       blkg_rwstat_exit(&blkg->stat_ios);
-       blkg_rwstat_exit(&blkg->stat_bytes);
+       free_percpu(blkg->iostat_cpu);
        percpu_ref_exit(&blkg->refcnt);
        kfree(blkg);
 }
                                   gfp_t gfp_mask)
 {
        struct blkcg_gq *blkg;
-       int i;
+       int i, cpu;
 
        /* alloc and init base part */
        blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
        if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
                goto err_free;
 
-       if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
-           blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
+       blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
+       if (!blkg->iostat_cpu)
                goto err_free;
 
        blkg->q = q;
        INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
        blkg->blkcg = blkcg;
 
+       u64_stats_init(&blkg->iostat.sync);
+       for_each_possible_cpu(cpu)
+               u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
+
        for (i = 0; i < BLKCG_MAX_POLS; i++) {
                struct blkcg_policy *pol = blkcg_policy[i];
                struct blkg_policy_data *pd;
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
        struct blkcg *blkcg = blkg->blkcg;
-       struct blkcg_gq *parent = blkg->parent;
        int i;
 
        lockdep_assert_held(&blkg->q->queue_lock);
                        pol->pd_offline_fn(blkg->pd[i]);
        }
 
-       if (parent) {
-               blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
-               blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
-       }
-
        blkg->online = false;
 
        radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
 {
        struct blkcg *blkcg = css_to_blkcg(css);
        struct blkcg_gq *blkg;
-       int i;
+       int i, cpu;
 
        mutex_lock(&blkcg_pol_mutex);
        spin_lock_irq(&blkcg->lock);
         * anyway.  If you get hit by a race, retry.
         */
        hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-               blkg_rwstat_reset(&blkg->stat_bytes);
-               blkg_rwstat_reset(&blkg->stat_ios);
+               for_each_possible_cpu(cpu) {
+                       struct blkg_iostat_set *bis =
+                               per_cpu_ptr(blkg->iostat_cpu, cpu);
+                       memset(bis, 0, sizeof(*bis));
+               }
+               memset(&blkg->iostat, 0, sizeof(blkg->iostat));
 
                for (i = 0; i < BLKCG_MAX_POLS; i++) {
                        struct blkcg_policy *pol = blkcg_policy[i];
        struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
        struct blkcg_gq *blkg;
 
+       cgroup_rstat_flush(blkcg->css.cgroup);
        rcu_read_lock();
 
        hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+               struct blkg_iostat_set *bis = &blkg->iostat;
                const char *dname;
                char *buf;
-               struct blkg_rwstat_sample rwstat;
                u64 rbytes, wbytes, rios, wios, dbytes, dios;
                size_t size = seq_get_buf(sf, &buf), off = 0;
                int i;
                bool has_stats = false;
+               unsigned seq;
 
                spin_lock_irq(&blkg->q->queue_lock);
 
                 */
                off += scnprintf(buf+off, size-off, "%s ", dname);
 
-               blkg_rwstat_recursive_sum(blkg, NULL,
-                               offsetof(struct blkcg_gq, stat_bytes), &rwstat);
-               rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
-               wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE];
-               dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD];
+               do {
+                       seq = u64_stats_fetch_begin(&bis->sync);
 
-               blkg_rwstat_recursive_sum(blkg, NULL,
-                                       offsetof(struct blkcg_gq, stat_ios), &rwstat);
-               rios = rwstat.cnt[BLKG_RWSTAT_READ];
-               wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
-               dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
+                       rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+                       wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+                       dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+                       rios = bis->cur.ios[BLKG_IOSTAT_READ];
+                       wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+                       dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+               } while (u64_stats_fetch_retry(&bis->sync, seq));
 
                if (rbytes || wbytes || rios || wios) {
                        has_stats = true;
        return ret;
 }
 
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] = src->bytes[i];
+               dst->ios[i] = src->ios[i];
+       }
+}
+
+static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] += src->bytes[i];
+               dst->ios[i] += src->ios[i];
+       }
+}
+
+static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] -= src->bytes[i];
+               dst->ios[i] -= src->ios[i];
+       }
+}
+
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+       struct blkcg *blkcg = css_to_blkcg(css);
+       struct blkcg_gq *blkg;
+
+       rcu_read_lock();
+
+       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+               struct blkcg_gq *parent = blkg->parent;
+               struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
+               struct blkg_iostat cur, delta;
+               unsigned seq;
+
+               /* fetch the current per-cpu values */
+               do {
+                       seq = u64_stats_fetch_begin(&bisc->sync);
+                       blkg_iostat_set(&cur, &bisc->cur);
+               } while (u64_stats_fetch_retry(&bisc->sync, seq));
+
+               /* propagate percpu delta to global */
+               u64_stats_update_begin(&blkg->iostat.sync);
+               blkg_iostat_set(&delta, &cur);
+               blkg_iostat_sub(&delta, &bisc->last);
+               blkg_iostat_add(&blkg->iostat.cur, &delta);
+               blkg_iostat_add(&bisc->last, &delta);
+               u64_stats_update_end(&blkg->iostat.sync);
+
+               /* propagate global delta to parent */
+               if (parent) {
+                       u64_stats_update_begin(&parent->iostat.sync);
+                       blkg_iostat_set(&delta, &blkg->iostat.cur);
+                       blkg_iostat_sub(&delta, &blkg->iostat.last);
+                       blkg_iostat_add(&parent->iostat.cur, &delta);
+                       blkg_iostat_add(&blkg->iostat.last, &delta);
+                       u64_stats_update_end(&parent->iostat.sync);
+               }
+       }
+
+       rcu_read_unlock();
+}
+
 static void blkcg_bind(struct cgroup_subsys_state *root_css)
 {
        int i;
        .css_offline = blkcg_css_offline,
        .css_free = blkcg_css_free,
        .can_attach = blkcg_can_attach,
+       .css_rstat_flush = blkcg_rstat_flush,
        .bind = blkcg_bind,
        .dfl_cftypes = blkcg_files,
        .legacy_cftypes = blkcg_legacy_files,
 
  */
 
 #include <linux/cgroup.h>
+#include <linux/percpu.h>
 #include <linux/percpu_counter.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/seq_file.h>
 #include <linux/radix-tree.h>
 #include <linux/blkdev.h>
 
 #ifdef CONFIG_BLK_CGROUP
 
+enum blkg_iostat_type {
+       BLKG_IOSTAT_READ,
+       BLKG_IOSTAT_WRITE,
+       BLKG_IOSTAT_DISCARD,
+
+       BLKG_IOSTAT_NR,
+};
+
 enum blkg_rwstat_type {
        BLKG_RWSTAT_READ,
        BLKG_RWSTAT_WRITE,
 #endif
 };
 
+struct blkg_iostat {
+       u64                             bytes[BLKG_IOSTAT_NR];
+       u64                             ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+       struct u64_stats_sync           sync;
+       struct blkg_iostat              cur;
+       struct blkg_iostat              last;
+};
+
 /*
  * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
  * recursive.  Used to carry stats of dead children.
        /* is this blkg online? protected by both blkcg and q locks */
        bool                            online;
 
-       struct blkg_rwstat              stat_bytes;
-       struct blkg_rwstat              stat_ios;
+       struct blkg_iostat_set __percpu *iostat_cpu;
+       struct blkg_iostat_set          iostat;
 
        struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
 
        throtl = blk_throtl_bio(q, blkg, bio);
 
        if (!throtl) {
+               struct blkg_iostat_set *bis;
+               int rwd, cpu;
+
+               if (op_is_discard(bio->bi_opf))
+                       rwd = BLKG_IOSTAT_DISCARD;
+               else if (op_is_write(bio->bi_opf))
+                       rwd = BLKG_IOSTAT_WRITE;
+               else
+                       rwd = BLKG_IOSTAT_READ;
+
+               cpu = get_cpu();
+               bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
+               u64_stats_update_begin(&bis->sync);
+
                /*
                 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
                 * is a split bio and we would have already accounted for the
                 * size of the bio.
                 */
                if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
-                       blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
-                                       bio->bi_iter.bi_size);
-               blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
+                       bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
+               bis->cur.ios[rwd]++;
+
+               u64_stats_update_end(&bis->sync);
+               cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
+               put_cpu();
        }
 
        blkcg_bio_issue_init(bio);