break;                                          \
                                                                        \
                mutex_unlock(&(ca)->set->bucket_lock);                  \
-               if (kthread_should_stop()) {                            \
+               if (kthread_should_stop() ||                            \
+                   test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) {  \
                        set_current_state(TASK_RUNNING);                \
                        return 0;                                       \
                }                                                       \
 
 #include <linux/refcount.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 
 #include "bset.h"
 #include "util.h"
  *
  * CACHE_SET_RUNNING means all cache devices have been registered and journal
  * replay is complete.
+ *
+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
+ * external and internal I/O should be denied when this flag is set.
+ *
  */
 #define CACHE_SET_UNREGISTERING                0
 #define        CACHE_SET_STOPPING              1
 #define        CACHE_SET_RUNNING               2
+#define CACHE_SET_IO_DISABLE           3
 
 struct cache_set {
        struct closure          cl;
                wake_up_process(ca->alloc_thread);
 }
 
+static inline void closure_bio_submit(struct cache_set *c,
+                                     struct bio *bio,
+                                     struct closure *cl)
+{
+       closure_get(cl);
+       if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+               bio->bi_status = BLK_STS_IOERR;
+               bio_endio(bio);
+               return;
+       }
+       generic_make_request(bio);
+}
+
+/*
+ * Prevent the kthread exits directly, and make sure when kthread_stop()
+ * is called to stop a kthread, it is still alive. If a kthread might be
+ * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
+ * necessary before the kthread returns.
+ */
+static inline void wait_for_kthread_stop(void)
+{
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule();
+       }
+}
+
 /* Forward declarations */
 
 void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
 
 
        btree_gc_start(c);
 
+       /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
        do {
                ret = btree_root(gc_root, c, &op, &writes, &stats);
                closure_sync(&writes);
 
                if (ret && ret != -EAGAIN)
                        pr_warn("gc failed!");
-       } while (ret);
+       } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
        bch_btree_gc_finish(c);
        wake_up_allocators(c);
 
        while (1) {
                wait_event_interruptible(c->gc_wait,
-                          kthread_should_stop() || gc_should_run(c));
+                          kthread_should_stop() ||
+                          test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
+                          gc_should_run(c));
 
-               if (kthread_should_stop())
+               if (kthread_should_stop() ||
+                   test_bit(CACHE_SET_IO_DISABLE, &c->flags))
                        break;
 
                set_gc_sectors(c);
                bch_btree_gc(c);
        }
 
+       wait_for_kthread_stop();
        return 0;
 }
 
 
        bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
 
        b->submit_time_us = local_clock_us();
-       closure_bio_submit(bio, bio->bi_private);
+       closure_bio_submit(c, bio, bio->bi_private);
 }
 
 void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
                bio_set_op_attrs(bio, REQ_OP_READ, 0);
                bch_bio_map(bio, data);
 
-               closure_bio_submit(bio, &cl);
+               closure_bio_submit(ca->set, bio, &cl);
                closure_sync(&cl);
 
                /* This function could be simpler now since we no longer write
        spin_unlock(&c->journal.lock);
 
        while ((bio = bio_list_pop(&list)))
-               closure_bio_submit(bio, cl);
+               closure_bio_submit(c, bio, cl);
 
        continue_at(cl, journal_write_done, NULL);
 }
 
 
                /* XXX: invalidate cache */
 
-               closure_bio_submit(bio, cl);
+               closure_bio_submit(s->iop.c, bio, cl);
        }
 
        continue_at(cl, cached_dev_cache_miss_done, NULL);
        s->cache_miss   = miss;
        s->iop.bio      = cache_bio;
        bio_get(cache_bio);
-       closure_bio_submit(cache_bio, &s->cl);
+       closure_bio_submit(s->iop.c, cache_bio, &s->cl);
 
        return ret;
 out_put:
 out_submit:
        miss->bi_end_io         = request_endio;
        miss->bi_private        = &s->cl;
-       closure_bio_submit(miss, &s->cl);
+       closure_bio_submit(s->iop.c, miss, &s->cl);
        return ret;
 }
 
 
                if ((bio_op(bio) != REQ_OP_DISCARD) ||
                    blk_queue_discard(bdev_get_queue(dc->bdev)))
-                       closure_bio_submit(bio, cl);
+                       closure_bio_submit(s->iop.c, bio, cl);
        } else if (s->iop.writeback) {
                bch_writeback_add(dc);
                s->iop.bio = bio;
                        flush->bi_private = cl;
                        flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
 
-                       closure_bio_submit(flush, cl);
+                       closure_bio_submit(s->iop.c, flush, cl);
                }
        } else {
                s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
 
-               closure_bio_submit(bio, cl);
+               closure_bio_submit(s->iop.c, bio, cl);
        }
 
        closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
                bch_journal_meta(s->iop.c, cl);
 
        /* If it's a flush, we send the flush to the backing device too */
-       closure_bio_submit(bio, cl);
+       closure_bio_submit(s->iop.c, bio, cl);
 
        continue_at(cl, cached_dev_bio_complete, NULL);
 }
        struct cached_dev *dc = container_of(d, struct cached_dev, disk);
        int rw = bio_data_dir(bio);
 
+       if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+               bio->bi_status = BLK_STS_IOERR;
+               bio_endio(bio);
+               return BLK_QC_T_NONE;
+       }
+
        atomic_set(&dc->backing_idle, 0);
        generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
 
        struct bcache_device *d = bio->bi_disk->private_data;
        int rw = bio_data_dir(bio);
 
+       if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+               bio->bi_status = BLK_STS_IOERR;
+               bio_endio(bio);
+               return BLK_QC_T_NONE;
+       }
+
        generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
 
        s = search_alloc(bio, d);
 
        bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
        bch_bio_map(bio, ca->disk_buckets);
 
-       closure_bio_submit(bio, &ca->prio);
+       closure_bio_submit(ca->set, bio, &ca->prio);
        closure_sync(cl);
 }
 
            test_bit(CACHE_SET_STOPPING, &c->flags))
                return false;
 
+       if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
+               pr_warn("CACHE_SET_IO_DISABLE already set");
+
        /* XXX: we can be called from atomic context
        acquire_console_sem();
        */
        c->congested_read_threshold_us  = 2000;
        c->congested_write_threshold_us = 20000;
        c->error_limit  = DEFAULT_IO_ERROR_LIMIT;
+       WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
        return c;
 err:
 
 
 rw_attribute(synchronous);
 rw_attribute(journal_delay_ms);
+rw_attribute(io_disable);
 rw_attribute(discard);
 rw_attribute(running);
 rw_attribute(label);
        sysfs_printf(gc_always_rewrite,         "%i", c->gc_always_rewrite);
        sysfs_printf(btree_shrinker_disabled,   "%i", c->shrinker_disabled);
        sysfs_printf(copy_gc_enabled,           "%i", c->copy_gc_enabled);
+       sysfs_printf(io_disable,                "%i",
+                    test_bit(CACHE_SET_IO_DISABLE, &c->flags));
 
        if (attr == &sysfs_bset_tree_stats)
                return bch_bset_print_stats(c, buf);
        if (attr == &sysfs_io_error_halflife)
                c->error_decay = strtoul_or_return(buf) / 88;
 
+       if (attr == &sysfs_io_disable) {
+               int v = strtoul_or_return(buf);
+
+               if (v) {
+                       if (test_and_set_bit(CACHE_SET_IO_DISABLE,
+                                            &c->flags))
+                               pr_warn("CACHE_SET_IO_DISABLE already set");
+               } else {
+                       if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
+                                               &c->flags))
+                               pr_warn("CACHE_SET_IO_DISABLE already cleared");
+               }
+       }
+
        sysfs_strtoul(journal_delay_ms,         c->journal_delay_ms);
        sysfs_strtoul(verify,                   c->verify);
        sysfs_strtoul(key_merging_disabled,     c->key_merging_disabled);
        &sysfs_gc_always_rewrite,
        &sysfs_btree_shrinker_disabled,
        &sysfs_copy_gc_enabled,
+       &sysfs_io_disable,
        NULL
 };
 KTYPE(bch_cache_set_internal);
 
        return bdev->bd_inode->i_size >> 9;
 }
 
-#define closure_bio_submit(bio, cl)                                    \
-do {                                                                   \
-       closure_get(cl);                                                \
-       generic_make_request(bio);                                      \
-} while (0)
-
 uint64_t bch_crc64_update(uint64_t, const void *, size_t);
 uint64_t bch_crc64(const void *, size_t);
 
 
        struct cached_dev *dc = container_of(to_delayed_work(work),
                                             struct cached_dev,
                                             writeback_rate_update);
+       struct cache_set *c = dc->disk.c;
 
        /*
         * should check BCACHE_DEV_RATE_DW_RUNNING before calling
        /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
        smp_mb();
 
-       if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
+       /*
+        * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+        * check it here too.
+        */
+       if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
+           test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
                clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
                /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
                smp_mb();
 
        up_read(&dc->writeback_lock);
 
-       if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) {
+       /*
+        * CACHE_SET_IO_DISABLE might be set via sysfs interface,
+        * check it here too.
+        */
+       if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
+           !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
                schedule_delayed_work(&dc->writeback_rate_update,
                              dc->writeback_rate_update_seconds * HZ);
        }
                bio_set_dev(&io->bio, io->dc->bdev);
                io->bio.bi_end_io       = dirty_endio;
 
-               closure_bio_submit(&io->bio, cl);
+               closure_bio_submit(io->dc->disk.c, &io->bio, cl);
        }
 
        atomic_set(&dc->writeback_sequence_next, next_sequence);
 {
        struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-       closure_bio_submit(&io->bio, cl);
+       closure_bio_submit(io->dc->disk.c, &io->bio, cl);
 
        continue_at(cl, write_dirty, io->dc->writeback_write_wq);
 }
 
        next = bch_keybuf_next(&dc->writeback_keys);
 
-       while (!kthread_should_stop() && next) {
+       while (!kthread_should_stop() &&
+              !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+              next) {
                size = 0;
                nk = 0;
 
                        }
                }
 
-               while (!kthread_should_stop() && delay) {
+               while (!kthread_should_stop() &&
+                      !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
+                      delay) {
                        schedule_timeout_interruptible(delay);
                        delay = writeback_delay(dc, 0);
                }
 static int bch_writeback_thread(void *arg)
 {
        struct cached_dev *dc = arg;
+       struct cache_set *c = dc->disk.c;
        bool searched_full_index;
 
        bch_ratelimit_reset(&dc->writeback_rate);
 
-       while (!kthread_should_stop()) {
+       while (!kthread_should_stop() &&
+              !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
                down_write(&dc->writeback_lock);
                set_current_state(TASK_INTERRUPTIBLE);
                /*
                    (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
                        up_write(&dc->writeback_lock);
 
-                       if (kthread_should_stop()) {
+                       if (kthread_should_stop() ||
+                           test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
                                set_current_state(TASK_RUNNING);
                                break;
                        }
 
                        while (delay &&
                               !kthread_should_stop() &&
+                              !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
                               !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
                                delay = schedule_timeout_interruptible(delay);
 
                }
        }
 
-       dc->writeback_thread = NULL;
        cached_dev_put(dc);
+       wait_for_kthread_stop();
 
        return 0;
 }