#define BTRFS_FS_STATE_ERROR           0
 #define BTRFS_FS_STATE_REMOUNTING      1
 #define BTRFS_FS_STATE_TRANS_ABORTED   2
+#define BTRFS_FS_STATE_DEV_REPLACING   3
 
 /* Super block flags */
 /* Errors detected */
 
        atomic_t mutually_exclusive_operation_running;
 
+       struct percpu_counter bio_counter;
+       wait_queue_head_t replace_wait;
+
        struct semaphore uuid_tree_rescan_sem;
        unsigned int update_uuid_tree_gen:1;
 };
 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
                         struct btrfs_scrub_progress *progress);
 
+/* dev-replace.c */
+void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
+void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
+void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info);
+
 /* reada.c */
 struct reada_control {
        struct btrfs_root       *root;          /* tree to prefetch */
 
        return ret;
 }
 
+/*
+ * blocked until all flighting bios are finished.
+ */
+static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
+{
+       s64 writers;
+       DEFINE_WAIT(wait);
+
+       set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
+       do {
+               prepare_to_wait(&fs_info->replace_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               writers = percpu_counter_sum(&fs_info->bio_counter);
+               if (writers)
+                       schedule();
+               finish_wait(&fs_info->replace_wait, &wait);
+       } while (writers);
+}
+
+/*
+ * we have removed target device, it is safe to allow new bios request.
+ */
+static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
+{
+       clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
+       if (waitqueue_active(&fs_info->replace_wait))
+               wake_up(&fs_info->replace_wait);
+}
+
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
                                       int scrub_ret)
 {
        src_device = dev_replace->srcdev;
        btrfs_dev_replace_unlock(dev_replace);
 
-       /* replace old device with new one in mapping tree */
-       if (!scrub_ret)
-               btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
-                                                               src_device,
-                                                               tgt_device);
-
        /*
         * flush all outstanding I/O and inode extent mappings before the
         * copy operation is declared as being finished
        dev_replace->time_stopped = get_seconds();
        dev_replace->item_needs_writeback = 1;
 
-       if (scrub_ret) {
+       /* replace old device with new one in mapping tree */
+       if (!scrub_ret) {
+               btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
+                                                               src_device,
+                                                               tgt_device);
+       } else {
                printk_in_rcu(KERN_ERR
                              "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
                              src_device->missing ? "<missing disk>" :
                fs_info->fs_devices->latest_bdev = tgt_device->bdev;
        list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
 
+       btrfs_rm_dev_replace_blocked(fs_info);
+
        btrfs_rm_dev_replace_srcdev(fs_info, src_device);
 
+       btrfs_rm_dev_replace_unblocked(fs_info);
+
        /*
         * this is again a consistent state where no dev_replace procedure
         * is running, the target device is part of the filesystem, the
                mutex_unlock(&dev_replace->lock_management_lock);
        }
 }
+
+void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
+{
+       percpu_counter_inc(&fs_info->bio_counter);
+}
+
+void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
+{
+       percpu_counter_dec(&fs_info->bio_counter);
+
+       if (waitqueue_active(&fs_info->replace_wait))
+               wake_up(&fs_info->replace_wait);
+}
+
+void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
+{
+       DEFINE_WAIT(wait);
+again:
+       percpu_counter_inc(&fs_info->bio_counter);
+       if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
+               btrfs_bio_counter_dec(fs_info);
+               wait_event(fs_info->replace_wait,
+                          !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
+                                    &fs_info->fs_state));
+               goto again;
+       }
+
+}
 
                goto fail_dirty_metadata_bytes;
        }
 
+       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       if (ret) {
+               err = ret;
+               goto fail_delalloc_bytes;
+       }
+
        fs_info->btree_inode = new_inode(sb);
        if (!fs_info->btree_inode) {
                err = -ENOMEM;
-               goto fail_delalloc_bytes;
+               goto fail_bio_counter;
        }
 
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
        atomic_set(&fs_info->scrub_pause_req, 0);
        atomic_set(&fs_info->scrubs_paused, 0);
        atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->replace_wait);
        init_waitqueue_head(&fs_info->scrub_pause_wait);
        fs_info->scrub_workers_refcnt = 0;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        iput(fs_info->btree_inode);
+fail_bio_counter:
+       percpu_counter_destroy(&fs_info->bio_counter);
 fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
 
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        percpu_counter_destroy(&fs_info->delalloc_bytes);
+       percpu_counter_destroy(&fs_info->bio_counter);
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
 
 static void btrfs_end_bio(struct bio *bio, int err)
 {
        struct btrfs_bio *bbio = bio->bi_private;
+       struct btrfs_device *dev = bbio->stripes[0].dev;
        int is_orig_bio = 0;
 
        if (err) {
                if (err == -EIO || err == -EREMOTEIO) {
                        unsigned int stripe_index =
                                btrfs_io_bio(bio)->stripe_index;
-                       struct btrfs_device *dev;
 
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
        if (bio == bbio->orig_bio)
                is_orig_bio = 1;
 
+       btrfs_bio_counter_dec(bbio->fs_info);
+
        if (atomic_dec_and_test(&bbio->stripes_pending)) {
                if (!is_orig_bio) {
                        bio_put(bio);
        }
 #endif
        bio->bi_bdev = dev->bdev;
+
+       btrfs_bio_counter_inc_noblocked(root->fs_info);
+
        if (async)
                btrfs_schedule_bio(root, dev, rw, bio);
        else
        length = bio->bi_size;
        map_length = length;
 
+       btrfs_bio_counter_inc_blocked(root->fs_info);
        ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
                              mirror_num, &raid_map);
-       if (ret) /* -ENOMEM */
+       if (ret) {
+               btrfs_bio_counter_dec(root->fs_info);
                return ret;
+       }
 
        total_devs = bbio->num_stripes;
        bbio->orig_bio = first_bio;
        bbio->private = first_bio->bi_private;
        bbio->end_io = first_bio->bi_end_io;
+       bbio->fs_info = root->fs_info;
        atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
        if (raid_map) {
                /* In this case, map_length has been set to the length of
                   a single stripe; not the whole write */
                if (rw & WRITE) {
-                       return raid56_parity_write(root, bio, bbio,
-                                                  raid_map, map_length);
+                       ret = raid56_parity_write(root, bio, bbio,
+                                                 raid_map, map_length);
                } else {
-                       return raid56_parity_recover(root, bio, bbio,
-                                                    raid_map, map_length,
-                                                    mirror_num);
+                       ret = raid56_parity_recover(root, bio, bbio,
+                                                   raid_map, map_length,
+                                                   mirror_num);
                }
+               /*
+                * FIXME, replace dosen't support raid56 yet, please fix
+                * it in the future.
+                */
+               btrfs_bio_counter_dec(root->fs_info);
+               return ret;
        }
 
        if (map_length < length) {
                                  async_submit);
                dev_nr++;
        }
+       btrfs_bio_counter_dec(root->fs_info);
        return 0;
 }
 
 
 
 struct btrfs_bio {
        atomic_t stripes_pending;
+       struct btrfs_fs_info *fs_info;
        bio_end_io_t *end_io;
        struct bio *orig_bio;
        void *private;