enum { DD_PRIO_COUNT = 3 };
 
-/* I/O statistics per I/O priority. */
+/*
+ * I/O statistics per I/O priority. It is fine if these counters overflow.
+ * What matters is that these counters are at least as wide as
+ * log2(max_outstanding_requests).
+ */
 struct io_stats_per_prio {
-       local_t inserted;
-       local_t merged;
-       local_t dispatched;
-       local_t completed;
-};
-
-/* I/O statistics for all I/O priorities (enum dd_prio). */
-struct io_stats {
-       struct io_stats_per_prio stats[DD_PRIO_COUNT];
+       uint32_t inserted;
+       uint32_t merged;
+       uint32_t dispatched;
+       atomic_t completed;
 };
 
 /*
        struct list_head fifo_list[DD_DIR_COUNT];
        /* Next request in FIFO order. Read, write or both are NULL. */
        struct request *next_rq[DD_DIR_COUNT];
+       struct io_stats_per_prio stats;
 };
 
 struct deadline_data {
        unsigned int batching;          /* number of sequential requests made */
        unsigned int starved;           /* times reads have starved writes */
 
-       struct io_stats __percpu *stats;
-
        /*
         * settings that change how the i/o scheduler behaves
         */
        spinlock_t zone_lock;
 };
 
-/* Count one event of type 'event_type' and with I/O priority 'prio' */
-#define dd_count(dd, event_type, prio) do {                            \
-       struct io_stats *io_stats = get_cpu_ptr((dd)->stats);           \
-                                                                       \
-       BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
-       BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
-       local_inc(&io_stats->stats[(prio)].event_type);                 \
-       put_cpu_ptr(io_stats);                                          \
-} while (0)
-
-/*
- * Returns the total number of dd_count(dd, event_type, prio) calls across all
- * CPUs. No locking or barriers since it is fine if the returned sum is slightly
- * outdated.
- */
-#define dd_sum(dd, event_type, prio) ({                                        \
-       unsigned int cpu;                                               \
-       u32 sum = 0;                                                    \
-                                                                       \
-       BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));       \
-       BUILD_BUG_ON(!__same_type((prio), enum dd_prio));               \
-       for_each_present_cpu(cpu)                                       \
-               sum += local_read(&per_cpu_ptr((dd)->stats, cpu)->      \
-                                 stats[(prio)].event_type);            \
-       sum;                                                            \
-})
-
 /* Maps an I/O priority class to a deadline scheduler priority. */
 static const enum dd_prio ioprio_class_to_prio[] = {
        [IOPRIO_CLASS_NONE]     = DD_BE_PRIO,
        const u8 ioprio_class = dd_rq_ioclass(next);
        const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
 
-       dd_count(dd, merged, prio);
+       lockdep_assert_held(&dd->lock);
+
+       dd->per_prio[prio].stats.merged++;
 
        /*
         * if next expires before rq, assign its expire time to rq
 /* Number of requests queued for a given priority level. */
 static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
 {
-       return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
+       const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+       lockdep_assert_held(&dd->lock);
+
+       return stats->inserted - atomic_read(&stats->completed);
 }
 
 /*
 done:
        ioprio_class = dd_rq_ioclass(rq);
        prio = ioprio_class_to_prio[ioprio_class];
-       dd_count(dd, dispatched, prio);
+       dd->per_prio[prio].stats.dispatched++;
        /*
         * If the request needs its target zone locked, do it.
         */
 
        for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
                struct dd_per_prio *per_prio = &dd->per_prio[prio];
+               const struct io_stats_per_prio *stats = &per_prio->stats;
+               uint32_t queued;
 
                WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
                WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
-               WARN_ONCE(dd_queued(dd, prio) != 0,
+
+               spin_lock(&dd->lock);
+               queued = dd_queued(dd, prio);
+               spin_unlock(&dd->lock);
+
+               WARN_ONCE(queued != 0,
                          "statistics for priority %d: i %u m %u d %u c %u\n",
-                         prio, dd_sum(dd, inserted, prio),
-                         dd_sum(dd, merged, prio),
-                         dd_sum(dd, dispatched, prio),
-                         dd_sum(dd, completed, prio));
+                         prio, stats->inserted, stats->merged,
+                         stats->dispatched, atomic_read(&stats->completed));
        }
 
-       free_percpu(dd->stats);
-
        kfree(dd);
 }
 
 
        eq->elevator_data = dd;
 
-       dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
-                                    GFP_KERNEL | __GFP_ZERO);
-       if (!dd->stats)
-               goto free_dd;
-
        for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
                struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
        q->elevator = eq;
        return 0;
 
-free_dd:
-       kfree(dd);
-
 put_eq:
        kobject_put(&eq->kobj);
        return ret;
        blk_req_zone_write_unlock(rq);
 
        prio = ioprio_class_to_prio[ioprio_class];
+       per_prio = &dd->per_prio[prio];
        if (!rq->elv.priv[0]) {
-               dd_count(dd, inserted, prio);
+               per_prio->stats.inserted++;
                rq->elv.priv[0] = (void *)(uintptr_t)1;
        }
 
 
        trace_block_rq_insert(rq);
 
-       per_prio = &dd->per_prio[prio];
        if (at_head) {
                list_add(&rq->queuelist, &per_prio->dispatch);
        } else {
        if (!rq->elv.priv[0])
                return;
 
-       dd_count(dd, completed, prio);
+       atomic_inc(&per_prio->stats.completed);
 
        if (blk_queue_is_zoned(q)) {
                unsigned long flags;
 {
        struct request_queue *q = data;
        struct deadline_data *dd = q->elevator->elevator_data;
+       u32 rt, be, idle;
+
+       spin_lock(&dd->lock);
+       rt = dd_queued(dd, DD_RT_PRIO);
+       be = dd_queued(dd, DD_BE_PRIO);
+       idle = dd_queued(dd, DD_IDLE_PRIO);
+       spin_unlock(&dd->lock);
+
+       seq_printf(m, "%u %u %u\n", rt, be, idle);
 
-       seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
-                  dd_queued(dd, DD_BE_PRIO),
-                  dd_queued(dd, DD_IDLE_PRIO));
        return 0;
 }
 
 /* Number of requests owned by the block driver for a given priority. */
 static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
 {
-       return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
-               - dd_sum(dd, completed, prio);
+       const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+       lockdep_assert_held(&dd->lock);
+
+       return stats->dispatched + stats->merged -
+               atomic_read(&stats->completed);
 }
 
 static int dd_owned_by_driver_show(void *data, struct seq_file *m)
 {
        struct request_queue *q = data;
        struct deadline_data *dd = q->elevator->elevator_data;
+       u32 rt, be, idle;
+
+       spin_lock(&dd->lock);
+       rt = dd_owned_by_driver(dd, DD_RT_PRIO);
+       be = dd_owned_by_driver(dd, DD_BE_PRIO);
+       idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
+       spin_unlock(&dd->lock);
+
+       seq_printf(m, "%u %u %u\n", rt, be, idle);
 
-       seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
-                  dd_owned_by_driver(dd, DD_BE_PRIO),
-                  dd_owned_by_driver(dd, DD_IDLE_PRIO));
        return 0;
 }