void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
 {
        spin_lock_irq(&buf->migf->list_lock);
+       buf->stop_copy_chunk_num = 0;
        list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
        spin_unlock_irq(&buf->migf->list_lock);
 }
                        struct mlx5_vf_migration_file, async_data);
 
        if (!status) {
+               size_t next_required_umem_size = 0;
+               bool stop_copy_last_chunk;
                size_t image_size;
                unsigned long flags;
                bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
 
                image_size = MLX5_GET(save_vhca_state_out, async_data->out,
                                      actual_image_size);
+               if (async_data->buf->stop_copy_chunk_num)
+                       next_required_umem_size = MLX5_GET(save_vhca_state_out,
+                                       async_data->out, next_required_umem_size);
+               stop_copy_last_chunk = async_data->stop_copy_chunk &&
+                               !next_required_umem_size;
                if (async_data->header_buf) {
                        status = add_buf_header(async_data->header_buf, image_size,
                                                initial_pre_copy);
                migf->max_pos += async_data->buf->length;
                spin_lock_irqsave(&migf->list_lock, flags);
                list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
+               if (async_data->buf->stop_copy_chunk_num) {
+                       migf->num_ready_chunks++;
+                       if (next_required_umem_size &&
+                           migf->num_ready_chunks >= MAX_NUM_CHUNKS) {
+                               /* Delay the next SAVE till one chunk be consumed */
+                               migf->next_required_umem_size = next_required_umem_size;
+                               next_required_umem_size = 0;
+                       }
+               }
                spin_unlock_irqrestore(&migf->list_lock, flags);
-               if (initial_pre_copy)
+               if (initial_pre_copy) {
                        migf->pre_copy_initial_bytes += image_size;
-               migf->state = async_data->stop_copy_chunk ?
-                       MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
+                       migf->state = MLX5_MIGF_STATE_PRE_COPY;
+               }
+               if (stop_copy_last_chunk)
+                       migf->state = MLX5_MIGF_STATE_COMPLETE;
                wake_up_interruptible(&migf->poll_wait);
+               if (next_required_umem_size)
+                       mlx5vf_mig_file_set_save_work(migf,
+                               /* Picking up the next chunk num */
+                               (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1,
+                               next_required_umem_size);
                mlx5vf_save_callback_complete(migf, async_data);
                return;
        }
        }
 
        if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-               if (async_data->stop_copy_chunk && migf->buf_header[0]) {
-                       header_buf = migf->buf_header[0];
-                       migf->buf_header[0] = NULL;
-               } else {
+               if (async_data->stop_copy_chunk) {
+                       u8 header_idx = buf->stop_copy_chunk_num ?
+                               buf->stop_copy_chunk_num - 1 : 0;
+
+                       header_buf = migf->buf_header[header_idx];
+                       migf->buf_header[header_idx] = NULL;
+               }
+
+               if (!header_buf) {
                        header_buf = mlx5vf_get_data_buffer(migf,
                                sizeof(struct mlx5_vf_migration_header), DMA_NONE);
                        if (IS_ERR(header_buf)) {
 
        void *out;
 };
 
+struct mlx5vf_save_work_data {
+       struct mlx5_vf_migration_file *migf;
+       size_t next_required_umem_size;
+       struct work_struct work;
+       u8 chunk_num;
+};
+
 #define MAX_NUM_CHUNKS 2
 
 struct mlx5_vf_migration_file {
        u32 record_tag;
        u64 stop_copy_prep_size;
        u64 pre_copy_initial_bytes;
+       size_t next_required_umem_size;
+       u8 num_ready_chunks;
        /* Upon chunk mode preserve another set of buffers for stop_copy phase */
        struct mlx5_vhca_data_buffer *buf[MAX_NUM_CHUNKS];
        struct mlx5_vhca_data_buffer *buf_header[MAX_NUM_CHUNKS];
+       struct mlx5vf_save_work_data save_data[MAX_NUM_CHUNKS];
        spinlock_t list_lock;
        struct list_head buf_list;
        struct list_head avail_list;
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
+void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
+                                  u8 chunk_num, size_t next_required_umem_size);
 int mlx5vf_start_page_tracker(struct vfio_device *vdev,
                struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
 int mlx5vf_stop_page_tracker(struct vfio_device *vdev);
 
        wake_up_interruptible(&migf->poll_wait);
 }
 
+void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
+                                  u8 chunk_num, size_t next_required_umem_size)
+{
+       migf->save_data[chunk_num - 1].next_required_umem_size =
+                       next_required_umem_size;
+       migf->save_data[chunk_num - 1].migf = migf;
+       get_file(migf->filp);
+       queue_work(migf->mvdev->cb_wq,
+                  &migf->save_data[chunk_num - 1].work);
+}
+
+static struct mlx5_vhca_data_buffer *
+mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
+                                 u8 index, size_t required_length)
+{
+       struct mlx5_vhca_data_buffer *buf = migf->buf[index];
+       u8 chunk_num;
+
+       WARN_ON(!buf);
+       chunk_num = buf->stop_copy_chunk_num;
+       buf->migf->buf[index] = NULL;
+       /* Checking whether the pre-allocated buffer can fit */
+       if (buf->allocated_length >= required_length)
+               return buf;
+
+       mlx5vf_put_data_buffer(buf);
+       buf = mlx5vf_get_data_buffer(buf->migf, required_length,
+                                    DMA_FROM_DEVICE);
+       if (IS_ERR(buf))
+               return buf;
+
+       buf->stop_copy_chunk_num = chunk_num;
+       return buf;
+}
+
+static void mlx5vf_mig_file_save_work(struct work_struct *_work)
+{
+       struct mlx5vf_save_work_data *save_data = container_of(_work,
+               struct mlx5vf_save_work_data, work);
+       struct mlx5_vf_migration_file *migf = save_data->migf;
+       struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
+       struct mlx5_vhca_data_buffer *buf;
+
+       mutex_lock(&mvdev->state_mutex);
+       if (migf->state == MLX5_MIGF_STATE_ERROR)
+               goto end;
+
+       buf = mlx5vf_mig_file_get_stop_copy_buf(migf,
+                               save_data->chunk_num - 1,
+                               save_data->next_required_umem_size);
+       if (IS_ERR(buf))
+               goto err;
+
+       if (mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false))
+               goto err_save;
+
+       goto end;
+
+err_save:
+       mlx5vf_put_data_buffer(buf);
+err:
+       mlx5vf_mark_err(migf);
+end:
+       mlx5vf_state_mutex_unlock(mvdev);
+       fput(migf->filp);
+}
+
 static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
                                       bool track)
 {
                if (mvdev->chunk_mode) {
                        migf->buf[i]->stop_copy_chunk_num = i + 1;
                        migf->buf_header[i]->stop_copy_chunk_num = i + 1;
+                       INIT_WORK(&migf->save_data[i].work,
+                                 mlx5vf_mig_file_save_work);
+                       migf->save_data[i].chunk_num = i + 1;
                }
        }
 
        if (ret)
                goto err;
 
-       /* Checking whether we have a matching pre-allocated buffer that can fit */
-       if (migf->buf[0]->allocated_length >= length) {
-               buf = migf->buf[0];
-               migf->buf[0] = NULL;
-       } else {
-               buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
-               if (IS_ERR(buf)) {
-                       ret = PTR_ERR(buf);
-                       goto err;
-               }
+       buf = mlx5vf_mig_file_get_stop_copy_buf(migf, 0, length);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto err;
        }
 
        ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);