static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
                               ext4_group_t group, int cr);
 
+static int ext4_try_to_trim_range(struct super_block *sb,
+               struct ext4_buddy *e4b, ext4_grpblk_t start,
+               ext4_grpblk_t max, ext4_grpblk_t minblocks);
+
 /*
  * The algorithm using this percpu seq counter goes below:
  * 1. We sample the percpu discard_pa_seq counter before trying for block
        return 0;
 }
 
+static void ext4_discard_work(struct work_struct *work)
+{
+       struct ext4_sb_info *sbi = container_of(work,
+                       struct ext4_sb_info, s_discard_work);
+       struct super_block *sb = sbi->s_sb;
+       struct ext4_free_data *fd, *nfd;
+       struct ext4_buddy e4b;
+       struct list_head discard_list;
+       ext4_group_t grp, load_grp;
+       int err = 0;
+
+       INIT_LIST_HEAD(&discard_list);
+       spin_lock(&sbi->s_md_lock);
+       list_splice_init(&sbi->s_discard_list, &discard_list);
+       spin_unlock(&sbi->s_md_lock);
+
+       load_grp = UINT_MAX;
+       list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) {
+               /*
+                * If filesystem is umounting or no memory, give up the discard
+                */
+               if ((sb->s_flags & SB_ACTIVE) && !err) {
+                       grp = fd->efd_group;
+                       if (grp != load_grp) {
+                               if (load_grp != UINT_MAX)
+                                       ext4_mb_unload_buddy(&e4b);
+
+                               err = ext4_mb_load_buddy(sb, grp, &e4b);
+                               if (err) {
+                                       kmem_cache_free(ext4_free_data_cachep, fd);
+                                       load_grp = UINT_MAX;
+                                       continue;
+                               } else {
+                                       load_grp = grp;
+                               }
+                       }
+
+                       ext4_lock_group(sb, grp);
+                       ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster,
+                                               fd->efd_start_cluster + fd->efd_count - 1, 1);
+                       ext4_unlock_group(sb, grp);
+               }
+               kmem_cache_free(ext4_free_data_cachep, fd);
+       }
+
+       if (load_grp != UINT_MAX)
+               ext4_mb_unload_buddy(&e4b);
+}
+
 int ext4_mb_init(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        spin_lock_init(&sbi->s_md_lock);
        sbi->s_mb_free_pending = 0;
        INIT_LIST_HEAD(&sbi->s_freed_data_list);
+       INIT_LIST_HEAD(&sbi->s_discard_list);
+       INIT_WORK(&sbi->s_discard_work, ext4_discard_work);
 
        sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
        sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
        struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
        int count;
 
+       if (test_opt(sb, DISCARD)) {
+               /*
+                * wait the discard work to drain all of ext4_free_data
+                */
+               flush_work(&sbi->s_discard_work);
+               WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
+       }
+
        if (sbi->s_group_info) {
                for (i = 0; i < ngroups; i++) {
                        cond_resched();
                put_page(e4b.bd_bitmap_page);
        }
        ext4_unlock_group(sb, entry->efd_group);
-       kmem_cache_free(ext4_free_data_cachep, entry);
        ext4_mb_unload_buddy(&e4b);
 
        mb_debug(sb, "freed %d blocks in %d structures\n", count,
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_free_data *entry, *tmp;
-       struct bio *discard_bio = NULL;
        struct list_head freed_data_list;
        struct list_head *cut_pos = NULL;
-       int err;
+       bool wake;
 
        INIT_LIST_HEAD(&freed_data_list);
 
                                  cut_pos);
        spin_unlock(&sbi->s_md_lock);
 
-       if (test_opt(sb, DISCARD)) {
-               list_for_each_entry(entry, &freed_data_list, efd_list) {
-                       err = ext4_issue_discard(sb, entry->efd_group,
-                                                entry->efd_start_cluster,
-                                                entry->efd_count,
-                                                &discard_bio);
-                       if (err && err != -EOPNOTSUPP) {
-                               ext4_msg(sb, KERN_WARNING, "discard request in"
-                                        " group:%d block:%d count:%d failed"
-                                        " with %d", entry->efd_group,
-                                        entry->efd_start_cluster,
-                                        entry->efd_count, err);
-                       } else if (err == -EOPNOTSUPP)
-                               break;
-               }
+       list_for_each_entry(entry, &freed_data_list, efd_list)
+               ext4_free_data_in_buddy(sb, entry);
 
-               if (discard_bio) {
-                       submit_bio_wait(discard_bio);
-                       bio_put(discard_bio);
-               }
+       if (test_opt(sb, DISCARD)) {
+               spin_lock(&sbi->s_md_lock);
+               wake = list_empty(&sbi->s_discard_list);
+               list_splice_tail(&freed_data_list, &sbi->s_discard_list);
+               spin_unlock(&sbi->s_md_lock);
+               if (wake)
+                       queue_work(system_unbound_wq, &sbi->s_discard_work);
+       } else {
+               list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
+                       kmem_cache_free(ext4_free_data_cachep, entry);
        }
-
-       list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
-               ext4_free_data_in_buddy(sb, entry);
 }
 
 int __init ext4_init_mballoc(void)