/*
  * FIXME: the cache is read/write for the time being.
  */
-enum cache_mode {
+enum cache_metadata_mode {
        CM_WRITE,               /* metadata may be changed */
        CM_READ_ONLY,           /* metadata may not be changed */
 };
 
+enum cache_io_mode {
+       /*
+        * Data is written to cached blocks only.  These blocks are marked
+        * dirty.  If you lose the cache device you will lose data.
+        * Potential performance increase for both reads and writes.
+        */
+       CM_IO_WRITEBACK,
+
+       /*
+        * Data is written to both cache and origin.  Blocks are never
+        * dirty.  Potential performance benfit for reads only.
+        */
+       CM_IO_WRITETHROUGH,
+
+       /*
+        * A degraded mode useful for various cache coherency situations
+        * (eg, rolling back snapshots).  Reads and writes always go to the
+        * origin.  If a write goes to a cached oblock, then the cache
+        * block is invalidated.
+        */
+       CM_IO_PASSTHROUGH
+};
+
 struct cache_features {
-       enum cache_mode mode;
-       bool write_through:1;
+       enum cache_metadata_mode mode;
+       enum cache_io_mode io_mode;
 };
 
 struct cache_stats {
 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
 
+static bool writethrough_mode(struct cache_features *f)
+{
+       return f->io_mode == CM_IO_WRITETHROUGH;
+}
+
+static bool writeback_mode(struct cache_features *f)
+{
+       return f->io_mode == CM_IO_WRITEBACK;
+}
+
+static bool passthrough_mode(struct cache_features *f)
+{
+       return f->io_mode == CM_IO_PASSTHROUGH;
+}
+
 static size_t get_per_bio_data_size(struct cache *cache)
 {
-       return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+       return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
 }
 
 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
        quiesce_migration(mg);
 }
 
+/*
+ * Invalidate a cache entry.  No writeback occurs; any changes in the cache
+ * block are thrown away.
+ */
+static void invalidate(struct cache *cache, struct prealloc *structs,
+                      dm_oblock_t oblock, dm_cblock_t cblock,
+                      struct dm_bio_prison_cell *cell)
+{
+       struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+       mg->err = false;
+       mg->writeback = false;
+       mg->demote = true;
+       mg->promote = false;
+       mg->requeue_holder = true;
+       mg->cache = cache;
+       mg->old_oblock = oblock;
+       mg->cblock = cblock;
+       mg->old_ocell = cell;
+       mg->new_ocell = NULL;
+       mg->start_jiffies = jiffies;
+
+       inc_nr_migrations(cache);
+       quiesce_migration(mg);
+}
+
 /*----------------------------------------------------------------
  * bio processing
  *--------------------------------------------------------------*/
        return current_volume < cache->migration_threshold;
 }
 
-static bool is_writethrough_io(struct cache *cache, struct bio *bio,
-                              dm_cblock_t cblock)
-{
-       return bio_data_dir(bio) == WRITE &&
-               cache->features.write_through && !is_dirty(cache, cblock);
-}
-
 static void inc_hit_counter(struct cache *cache, struct bio *bio)
 {
        atomic_inc(bio_data_dir(bio) == READ ?
                   &cache->stats.read_miss : &cache->stats.write_miss);
 }
 
+static void issue_cache_bio(struct cache *cache, struct bio *bio,
+                           struct per_bio_data *pb,
+                           dm_oblock_t oblock, dm_cblock_t cblock)
+{
+       pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+       remap_to_cache_dirty(cache, bio, oblock, cblock);
+       issue(cache, bio);
+}
+
 static void process_bio(struct cache *cache, struct prealloc *structs,
                        struct bio *bio)
 {
        size_t pb_data_size = get_per_bio_data_size(cache);
        struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
        bool discarded_block = is_discarded_oblock(cache, block);
-       bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
+       bool passthrough = passthrough_mode(&cache->features);
+       bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
 
        /*
         * Check to see if that block is currently migrating.
 
        switch (lookup_result.op) {
        case POLICY_HIT:
-               inc_hit_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+               if (passthrough) {
+                       inc_miss_counter(cache, bio);
 
-               if (is_writethrough_io(cache, bio, lookup_result.cblock))
-                       remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-               else
-                       remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+                       /*
+                        * Passthrough always maps to the origin,
+                        * invalidating any cache blocks that are written
+                        * to.
+                        */
+
+                       if (bio_data_dir(bio) == WRITE) {
+                               atomic_inc(&cache->stats.demotion);
+                               invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
+                               release_cell = false;
+
+                       } else {
+                               /* FIXME: factor out issue_origin() */
+                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+                               remap_to_origin_clear_discard(cache, bio, block);
+                               issue(cache, bio);
+                       }
+               } else {
+                       inc_hit_counter(cache, bio);
+
+                       if (bio_data_dir(bio) == WRITE &&
+                           writethrough_mode(&cache->features) &&
+                           !is_dirty(cache, lookup_result.cblock)) {
+                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+                               remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+                               issue(cache, bio);
+                       } else
+                               issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+               }
 
-               issue(cache, bio);
                break;
 
        case POLICY_MISS:
 static void init_features(struct cache_features *cf)
 {
        cf->mode = CM_WRITE;
-       cf->write_through = false;
+       cf->io_mode = CM_IO_WRITEBACK;
 }
 
 static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
                arg = dm_shift_arg(as);
 
                if (!strcasecmp(arg, "writeback"))
-                       cf->write_through = false;
+                       cf->io_mode = CM_IO_WRITEBACK;
 
                else if (!strcasecmp(arg, "writethrough"))
-                       cf->write_through = true;
+                       cf->io_mode = CM_IO_WRITETHROUGH;
+
+               else if (!strcasecmp(arg, "passthrough"))
+                       cf->io_mode = CM_IO_PASSTHROUGH;
 
                else {
                        *error = "Unrecognised cache feature requested";
        }
        cache->cmd = cmd;
 
+       if (passthrough_mode(&cache->features)) {
+               bool all_clean;
+
+               r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
+               if (r) {
+                       *error = "dm_cache_metadata_all_clean() failed";
+                       goto bad;
+               }
+
+               if (!all_clean) {
+                       *error = "Cannot enter passthrough mode unless all blocks are clean";
+                       r = -EINVAL;
+                       goto bad;
+               }
+       }
+
        spin_lock_init(&cache->lock);
        bio_list_init(&cache->deferred_bios);
        bio_list_init(&cache->deferred_flush_bios);
                return DM_MAPIO_SUBMITTED;
        }
 
+       r = DM_MAPIO_REMAPPED;
        switch (lookup_result.op) {
        case POLICY_HIT:
-               inc_hit_counter(cache, bio);
-               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+               if (passthrough_mode(&cache->features)) {
+                       if (bio_data_dir(bio) == WRITE) {
+                               /*
+                                * We need to invalidate this block, so
+                                * defer for the worker thread.
+                                */
+                               cell_defer(cache, cell, true);
+                               r = DM_MAPIO_SUBMITTED;
+
+                       } else {
+                               pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+                               inc_miss_counter(cache, bio);
+                               remap_to_origin_clear_discard(cache, bio, block);
+
+                               cell_defer(cache, cell, false);
+                       }
 
-               if (is_writethrough_io(cache, bio, lookup_result.cblock))
-                       remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-               else
-                       remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+               } else {
+                       inc_hit_counter(cache, bio);
+
+                       if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
+                           !is_dirty(cache, lookup_result.cblock))
+                               remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+                       else
+                               remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
 
-               cell_defer(cache, cell, false);
+                       cell_defer(cache, cell, false);
+               }
                break;
 
        case POLICY_MISS:
                DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
                            (unsigned) lookup_result.op);
                bio_io_error(bio);
-               return DM_MAPIO_SUBMITTED;
+               r = DM_MAPIO_SUBMITTED;
        }
 
-       return DM_MAPIO_REMAPPED;
+       return r;
 }
 
 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
                       (unsigned long long) from_cblock(residency),
                       cache->nr_dirty);
 
-               if (cache->features.write_through)
+               if (writethrough_mode(&cache->features))
                        DMEMIT("1 writethrough ");
-               else
-                       DMEMIT("0 ");
+
+               else if (passthrough_mode(&cache->features))
+                       DMEMIT("1 passthrough ");
+
+               else if (writeback_mode(&cache->features))
+                       DMEMIT("1 writeback ");
+
+               else {
+                       DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
+                       goto err;
+               }
 
                DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
                if (sz < maxlen) {
 
 static struct target_type cache_target = {
        .name = "cache",
-       .version = {1, 1, 1},
+       .version = {1, 2, 0},
        .module = THIS_MODULE,
        .ctr = cache_ctr,
        .dtr = cache_dtr,