atomic_t scale_cookie;
 };
 
+struct percentile_stats {
+       u64 total;
+       u64 missed;
+};
+
+struct latency_stat {
+       union {
+               struct percentile_stats ps;
+               struct blk_rq_stat rqs;
+       };
+};
+
 struct iolatency_grp {
        struct blkg_policy_data pd;
-       struct blk_rq_stat __percpu *stats;
+       struct latency_stat __percpu *stats;
        struct blk_iolatency *blkiolat;
        struct rq_depth rq_depth;
        struct rq_wait rq_wait;
        /* Our current number of IO's for the last summation. */
        u64 nr_samples;
 
+       bool ssd;
        struct child_latency_info child_lat;
 };
 
        return pd_to_blkg(&iolat->pd);
 }
 
+static inline void latency_stat_init(struct iolatency_grp *iolat,
+                                    struct latency_stat *stat)
+{
+       if (iolat->ssd) {
+               stat->ps.total = 0;
+               stat->ps.missed = 0;
+       } else
+               blk_rq_stat_init(&stat->rqs);
+}
+
+static inline void latency_stat_sum(struct iolatency_grp *iolat,
+                                   struct latency_stat *sum,
+                                   struct latency_stat *stat)
+{
+       if (iolat->ssd) {
+               sum->ps.total += stat->ps.total;
+               sum->ps.missed += stat->ps.missed;
+       } else
+               blk_rq_stat_sum(&sum->rqs, &stat->rqs);
+}
+
+static inline void latency_stat_record_time(struct iolatency_grp *iolat,
+                                           u64 req_time)
+{
+       struct latency_stat *stat = get_cpu_ptr(iolat->stats);
+       if (iolat->ssd) {
+               if (req_time >= iolat->min_lat_nsec)
+                       stat->ps.missed++;
+               stat->ps.total++;
+       } else
+               blk_rq_stat_add(&stat->rqs, req_time);
+       put_cpu_ptr(stat);
+}
+
+static inline bool latency_sum_ok(struct iolatency_grp *iolat,
+                                 struct latency_stat *stat)
+{
+       if (iolat->ssd) {
+               u64 thresh = div64_u64(stat->ps.total, 10);
+               thresh = max(thresh, 1ULL);
+               return stat->ps.missed < thresh;
+       }
+       return stat->rqs.mean <= iolat->min_lat_nsec;
+}
+
+static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
+                                      struct latency_stat *stat)
+{
+       if (iolat->ssd)
+               return stat->ps.total;
+       return stat->rqs.nr_samples;
+}
+
+static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
+                                             struct latency_stat *stat)
+{
+       int exp_idx;
+
+       if (iolat->ssd)
+               return;
+
+       /*
+        * CALC_LOAD takes in a number stored in fixed point representation.
+        * Because we are using this for IO time in ns, the values stored
+        * are significantly larger than the FIXED_1 denominator (2048).
+        * Therefore, rounding errors in the calculation are negligible and
+        * can be ignored.
+        */
+       exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
+                       div64_u64(iolat->cur_win_nsec,
+                                 BLKIOLATENCY_EXP_BUCKET_SIZE));
+       CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean);
+}
+
 static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
                                       wait_queue_entry_t *wait,
                                       bool first_block)
                                  struct bio_issue *issue, u64 now,
                                  bool issue_as_root)
 {
-       struct blk_rq_stat *rq_stat;
        u64 start = bio_issue_time(issue);
        u64 req_time;
 
                return;
        }
 
-       rq_stat = get_cpu_ptr(iolat->stats);
-       blk_rq_stat_add(rq_stat, req_time);
-       put_cpu_ptr(rq_stat);
+       latency_stat_record_time(iolat, req_time);
 }
 
 #define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
        struct blkcg_gq *blkg = lat_to_blkg(iolat);
        struct iolatency_grp *parent;
        struct child_latency_info *lat_info;
-       struct blk_rq_stat stat;
+       struct latency_stat stat;
        unsigned long flags;
-       int cpu, exp_idx;
+       int cpu;
 
-       blk_rq_stat_init(&stat);
+       latency_stat_init(iolat, &stat);
        preempt_disable();
        for_each_online_cpu(cpu) {
-               struct blk_rq_stat *s;
+               struct latency_stat *s;
                s = per_cpu_ptr(iolat->stats, cpu);
-               blk_rq_stat_sum(&stat, s);
-               blk_rq_stat_init(s);
+               latency_stat_sum(iolat, &stat, s);
+               latency_stat_init(iolat, s);
        }
        preempt_enable();
 
 
        lat_info = &parent->child_lat;
 
-       /*
-        * CALC_LOAD takes in a number stored in fixed point representation.
-        * Because we are using this for IO time in ns, the values stored
-        * are significantly larger than the FIXED_1 denominator (2048).
-        * Therefore, rounding errors in the calculation are negligible and
-        * can be ignored.
-        */
-       exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
-                       div64_u64(iolat->cur_win_nsec,
-                                 BLKIOLATENCY_EXP_BUCKET_SIZE));
-       CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean);
+       iolat_update_total_lat_avg(iolat, &stat);
 
        /* Everything is ok and we don't need to adjust the scale. */
-       if (stat.mean <= iolat->min_lat_nsec &&
+       if (latency_sum_ok(iolat, &stat) &&
            atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
                return;
 
        /* Somebody beat us to the punch, just bail. */
        spin_lock_irqsave(&lat_info->lock, flags);
        lat_info->nr_samples -= iolat->nr_samples;
-       lat_info->nr_samples += stat.nr_samples;
-       iolat->nr_samples = stat.nr_samples;
+       lat_info->nr_samples += latency_stat_samples(iolat, &stat);
+       iolat->nr_samples = latency_stat_samples(iolat, &stat);
 
        if ((lat_info->last_scale_event >= now ||
            now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
            lat_info->scale_lat <= iolat->min_lat_nsec)
                goto out;
 
-       if (stat.mean <= iolat->min_lat_nsec &&
-           stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
+       if (latency_sum_ok(iolat, &stat)) {
+               if (latency_stat_samples(iolat, &stat) <
+                   BLKIOLATENCY_MIN_GOOD_SAMPLES)
+                       goto out;
                if (lat_info->scale_grp == iolat) {
                        lat_info->last_scale_event = now;
                        scale_cookie_change(iolat->blkiolat, lat_info, true);
                }
-       } else if (stat.mean > iolat->min_lat_nsec) {
+       } else {
                lat_info->last_scale_event = now;
                if (!lat_info->scale_grp ||
                    lat_info->scale_lat > iolat->min_lat_nsec) {
        return 0;
 }
 
+static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
+                                size_t size)
+{
+       struct latency_stat stat;
+       int cpu;
+
+       latency_stat_init(iolat, &stat);
+       preempt_disable();
+       for_each_online_cpu(cpu) {
+               struct latency_stat *s;
+               s = per_cpu_ptr(iolat->stats, cpu);
+               latency_stat_sum(iolat, &stat, s);
+       }
+       preempt_enable();
+
+       if (iolat->rq_depth.max_depth == UINT_MAX)
+               return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
+                                (unsigned long long)stat.ps.missed,
+                                (unsigned long long)stat.ps.total);
+       return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
+                        (unsigned long long)stat.ps.missed,
+                        (unsigned long long)stat.ps.total,
+                        iolat->rq_depth.max_depth);
+}
+
 static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
                                size_t size)
 {
        struct iolatency_grp *iolat = pd_to_lat(pd);
-       unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
-       unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
+       unsigned long long avg_lat;
+       unsigned long long cur_win;
+
+       if (iolat->ssd)
+               return iolatency_ssd_stat(iolat, buf, size);
 
+       avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
+       cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
        if (iolat->rq_depth.max_depth == UINT_MAX)
                return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
                                 avg_lat, cur_win);
        iolat = kzalloc_node(sizeof(*iolat), gfp, node);
        if (!iolat)
                return NULL;
-       iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
-                                      __alignof__(struct blk_rq_stat), gfp);
+       iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
+                                      __alignof__(struct latency_stat), gfp);
        if (!iolat->stats) {
                kfree(iolat);
                return NULL;
        u64 now = ktime_to_ns(ktime_get());
        int cpu;
 
+       if (blk_queue_nonrot(blkg->q))
+               iolat->ssd = true;
+       else
+               iolat->ssd = false;
+
        for_each_possible_cpu(cpu) {
-               struct blk_rq_stat *stat;
+               struct latency_stat *stat;
                stat = per_cpu_ptr(iolat->stats, cpu);
-               blk_rq_stat_init(stat);
+               latency_stat_init(iolat, stat);
        }
 
        rq_wait_init(&iolat->rq_wait);