dm_cblock_t cblock;
 
        bool err:1;
+       bool discard:1;
        bool writeback:1;
        bool demote:1;
        bool promote:1;
 
 /*----------------------------------------------------------------*/
 
-static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
+static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
 {
        key->virtual = 0;
        key->dev = 0;
-       key->block_begin = from_oblock(oblock);
-       key->block_end = key->block_begin + 1ULL;
+       key->block_begin = from_oblock(begin);
+       key->block_end = from_oblock(end);
 }
 
 /*
  */
 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
 
-static int bio_detain(struct cache *cache, dm_oblock_t oblock,
-                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
-                     cell_free_fn free_fn, void *free_context,
-                     struct dm_bio_prison_cell **cell_result)
+static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
+                           struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                           cell_free_fn free_fn, void *free_context,
+                           struct dm_bio_prison_cell **cell_result)
 {
        int r;
        struct dm_cell_key key;
 
-       build_key(oblock, &key);
+       build_key(oblock_begin, oblock_end, &key);
        r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
        if (r)
                free_fn(free_context, cell_prealloc);
        return r;
 }
 
+static int bio_detain(struct cache *cache, dm_oblock_t oblock,
+                     struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
+                     cell_free_fn free_fn, void *free_context,
+                     struct dm_bio_prison_cell **cell_result)
+{
+       dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
+       return bio_detain_range(cache, oblock, end, bio,
+                               cell_prealloc, free_fn, free_context, cell_result);
+}
+
 static int get_cell(struct cache *cache,
                    dm_oblock_t oblock,
                    struct prealloc *structs,
 
        cell_prealloc = prealloc_get_cell(structs);
 
-       build_key(oblock, &key);
+       build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
        r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
        if (r)
                prealloc_put_cell(structs, cell_prealloc);
        return b;
 }
 
-static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+static dm_block_t oblocks_per_dblock(struct cache *cache)
 {
-       uint32_t discard_blocks = cache->discard_block_size;
-       dm_block_t b = from_oblock(oblock);
+       dm_block_t oblocks = cache->discard_block_size;
 
-       if (!block_size_is_power_of_two(cache))
-               discard_blocks = discard_blocks / cache->sectors_per_block;
+       if (block_size_is_power_of_two(cache))
+               oblocks >>= cache->sectors_per_block_shift;
        else
-               discard_blocks >>= cache->sectors_per_block_shift;
+               oblocks = block_div(oblocks, cache->sectors_per_block);
 
-       b = block_div(b, discard_blocks);
+       return oblocks;
+}
+
+static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
+{
+       return to_dblock(block_div(from_oblock(oblock),
+                                  oblocks_per_dblock(cache)));
+}
 
-       return to_dblock(b);
+static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
+{
+       return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
 }
 
 static void set_discard(struct cache *cache, dm_dblock_t b)
 {
        unsigned long flags;
 
+       BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
        atomic_inc(&cache->stats.discard_count);
 
        spin_lock_irqsave(&cache->lock, flags);
        wake_worker(cache);
 }
 
-static void issue_copy_real(struct dm_cache_migration *mg)
+static void issue_copy(struct dm_cache_migration *mg)
 {
        int r;
        struct dm_io_region o_region, c_region;
        migration_success_pre_commit(mg);
 }
 
-static void issue_copy(struct dm_cache_migration *mg)
+static void calc_discard_block_range(struct cache *cache, struct bio *bio,
+                                    dm_dblock_t *b, dm_dblock_t *e)
+{
+       sector_t sb = bio->bi_iter.bi_sector;
+       sector_t se = bio_end_sector(bio);
+
+       *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
+
+       if (se - sb < cache->discard_block_size)
+               *e = *b;
+       else
+               *e = to_dblock(block_div(se, cache->discard_block_size));
+}
+
+static void issue_discard(struct dm_cache_migration *mg)
+{
+       dm_dblock_t b, e;
+       struct bio *bio = mg->new_ocell->holder;
+
+       calc_discard_block_range(mg->cache, bio, &b, &e);
+       while (b != e) {
+               set_discard(mg->cache, b);
+               b = to_dblock(from_dblock(b) + 1);
+       }
+
+       bio_endio(bio, 0);
+       cell_defer(mg->cache, mg->new_ocell, false);
+       free_migration(mg);
+}
+
+static void issue_copy_or_discard(struct dm_cache_migration *mg)
 {
        bool avoid;
        struct cache *cache = mg->cache;
 
+       if (mg->discard) {
+               issue_discard(mg);
+               return;
+       }
+
        if (mg->writeback || mg->demote)
                avoid = !is_dirty(cache, mg->cblock) ||
                        is_discarded_oblock(cache, mg->old_oblock);
                }
        }
 
-       avoid ? avoid_copy(mg) : issue_copy_real(mg);
+       avoid ? avoid_copy(mg) : issue_copy(mg);
 }
 
 static void complete_migration(struct dm_cache_migration *mg)
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = false;
        mg->promote = true;
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = true;
        mg->demote = false;
        mg->promote = false;
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = true;
        mg->promote = true;
        struct dm_cache_migration *mg = prealloc_get_migration(structs);
 
        mg->err = false;
+       mg->discard = false;
        mg->writeback = false;
        mg->demote = true;
        mg->promote = false;
        quiesce_migration(mg);
 }
 
+static void discard(struct cache *cache, struct prealloc *structs,
+                   struct dm_bio_prison_cell *cell)
+{
+       struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+       mg->err = false;
+       mg->discard = true;
+       mg->writeback = false;
+       mg->demote = false;
+       mg->promote = false;
+       mg->requeue_holder = false;
+       mg->invalidate = false;
+       mg->cache = cache;
+       mg->old_ocell = NULL;
+       mg->new_ocell = cell;
+       mg->start_jiffies = jiffies;
+
+       quiesce_migration(mg);
+}
+
 /*----------------------------------------------------------------
  * bio processing
  *--------------------------------------------------------------*/
        issue(cache, bio);
 }
 
-/*
- * People generally discard large parts of a device, eg, the whole device
- * when formatting.  Splitting these large discards up into cache block
- * sized ios and then quiescing (always neccessary for discard) takes too
- * long.
- *
- * We keep it simple, and allow any size of discard to come in, and just
- * mark off blocks on the discard bitset.  No passdown occurs!
- *
- * To implement passdown we need to change the bio_prison such that a cell
- * can have a key that spans many blocks.
- */
-static void process_discard_bio(struct cache *cache, struct bio *bio)
+static void process_discard_bio(struct cache *cache, struct prealloc *structs,
+                               struct bio *bio)
 {
-       dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
-                                                 cache->discard_block_size);
-       dm_block_t end_block = bio_end_sector(bio);
-       dm_block_t b;
+       int r;
+       dm_dblock_t b, e;
+       struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
 
-       end_block = block_div(end_block, cache->discard_block_size);
+       calc_discard_block_range(cache, bio, &b, &e);
+       if (b == e) {
+               bio_endio(bio, 0);
+               return;
+       }
 
-       for (b = start_block; b < end_block; b++)
-               set_discard(cache, to_dblock(b));
+       cell_prealloc = prealloc_get_cell(structs);
+       r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
+                            (cell_free_fn) prealloc_put_cell,
+                            structs, &new_ocell);
+       if (r > 0)
+               return;
 
-       bio_endio(bio, 0);
+       discard(cache, structs, new_ocell);
 }
 
 static bool spare_migration_bandwidth(struct cache *cache)
                if (bio->bi_rw & REQ_FLUSH)
                        process_flush_bio(cache, bio);
                else if (bio->bi_rw & REQ_DISCARD)
-                       process_discard_bio(cache, bio);
+                       process_discard_bio(cache, &structs, bio);
                else
                        process_bio(cache, &structs, bio);
        }
                        process_invalidation_requests(cache);
                }
 
-               process_migrations(cache, &cache->quiesced_migrations, issue_copy);
+               process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
                process_migrations(cache, &cache->completed_migrations, complete_migration);
 
                if (commit_if_needed(cache)) {
        /*
         * FIXME: these limits may be incompatible with the cache device
         */
-       limits->max_discard_sectors = cache->discard_block_size * 1024;
+       limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+                                           cache->origin_sectors);
        limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
 }
 
 
 static struct target_type cache_target = {
        .name = "cache",
-       .version = {1, 5, 0},
+       .version = {1, 6, 0},
        .module = THIS_MODULE,
        .ctr = cache_ctr,
        .dtr = cache_dtr,