(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DISCARD_ISSUE_RATE             8
+#define DEF_MIN_DISCARD_ISSUE_TIME     50      /* 50 ms, if exists */
+#define DEF_MAX_DISCARD_ISSUE_TIME     60000   /* 60 s, if no candidates */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
 
        unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */
 };
 
+/* default discard granularity of inner discard thread, unit: block count */
+#define DEFAULT_DISCARD_GRANULARITY            16
+
 /* max discard pend list number */
 #define MAX_PLIST_NUM          512
 #define plist_idx(blk_num)     ((blk_num) >= MAX_PLIST_NUM ?           \
                                        (MAX_PLIST_NUM - 1) : (blk_num - 1))
 
+#define P_ACTIVE       0x01
+#define P_TRIM         0x02
+#define plist_issue(tag)       (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
+
 enum {
        D_PREP,
        D_SUBMIT,
        struct task_struct *f2fs_issue_discard; /* discard thread */
        struct list_head entry_list;            /* 4KB discard entry list */
        struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
+       unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
        struct list_head wait_list;             /* store on-flushing entries */
        wait_queue_head_t discard_wait_queue;   /* waiting queue for wake-up */
+       unsigned int discard_wake;              /* to wake up discard thread */
        struct mutex cmd_lock;
        unsigned int nr_discards;               /* # of discards in the list */
        unsigned int max_discards;              /* max. discards to be issued */
+       unsigned int discard_granularity;       /* discard granularity */
        unsigned int undiscard_blks;            /* # of undiscard blocks */
        atomic_t issued_discard;                /* # of issued discard */
        atomic_t issing_discard;                /* # of issing discard */
 
        return 0;
 }
 
-static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
+static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
 {
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        struct list_head *pend_list;
        struct discard_cmd *dc, *tmp;
        struct blk_plug plug;
-       int i, iter = 0;
+       int iter = 0, issued = 0;
+       int i;
 
        mutex_lock(&dcc->cmd_lock);
        f2fs_bug_on(sbi,
                !__check_rb_tree_consistence(sbi, &dcc->root));
        blk_start_plug(&plug);
-       for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+       for (i = MAX_PLIST_NUM - 1;
+                       i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
                pend_list = &dcc->pend_list[i];
                list_for_each_entry_safe(dc, tmp, pend_list, list) {
                        f2fs_bug_on(sbi, dc->state != D_PREP);
 
-                       if (!issue_cond || is_idle(sbi))
+                       /* Hurry up to finish fstrim */
+                       if (dcc->pend_list_tag[i] & P_TRIM) {
+                               __submit_discard_cmd(sbi, dc);
+                               issued++;
+                               continue;
+                       }
+
+                       if (!issue_cond || is_idle(sbi)) {
+                               issued++;
                                __submit_discard_cmd(sbi, dc);
+                       }
                        if (issue_cond && iter++ > DISCARD_ISSUE_RATE)
                                goto out;
                }
+               if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM)
+                       dcc->pend_list_tag[i] &= (~P_TRIM);
        }
 out:
        blk_finish_plug(&plug);
        mutex_unlock(&dcc->cmd_lock);
+
+       return issued;
+}
+
+static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct list_head *pend_list;
+       struct discard_cmd *dc, *tmp;
+       int i;
+
+       mutex_lock(&dcc->cmd_lock);
+       for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+               pend_list = &dcc->pend_list[i];
+               list_for_each_entry_safe(dc, tmp, pend_list, list) {
+                       f2fs_bug_on(sbi, dc->state != D_PREP);
+                       __remove_discard_cmd(sbi, dc);
+               }
+       }
+       mutex_unlock(&dcc->cmd_lock);
 }
 
 static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
 void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
 {
        __issue_discard_cmd(sbi, false);
+       __drop_discard_cmd(sbi);
        __wait_discard_cmd(sbi, false);
 }
 
+static void mark_discard_range_all(struct f2fs_sb_info *sbi)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       int i;
+
+       mutex_lock(&dcc->cmd_lock);
+       for (i = 0; i < MAX_PLIST_NUM; i++)
+               dcc->pend_list_tag[i] |= P_TRIM;
+       mutex_unlock(&dcc->cmd_lock);
+}
+
 static int issue_discard_thread(void *data)
 {
        struct f2fs_sb_info *sbi = data;
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        wait_queue_head_t *q = &dcc->discard_wait_queue;
+       unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+       int issued;
 
        set_freezable();
 
        do {
-               wait_event_interruptible(*q, kthread_should_stop() ||
-                                       freezing(current) ||
-                                       atomic_read(&dcc->discard_cmd_cnt));
+               wait_event_interruptible_timeout(*q,
+                               kthread_should_stop() || freezing(current) ||
+                               dcc->discard_wake,
+                               msecs_to_jiffies(wait_ms));
                if (try_to_freeze())
                        continue;
                if (kthread_should_stop())
                        return 0;
 
+               if (dcc->discard_wake)
+                       dcc->discard_wake = 0;
+
                sb_start_intwrite(sbi->sb);
 
-               __issue_discard_cmd(sbi, true);
-               __wait_discard_cmd(sbi, true);
+               issued = __issue_discard_cmd(sbi, true);
+               if (issued) {
+                       __wait_discard_cmd(sbi, true);
+                       wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+               } else {
+                       wait_ms = DEF_MAX_DISCARD_ISSUE_TIME;
+               }
 
                sb_end_intwrite(sbi->sb);
 
-               congestion_wait(BLK_RW_SYNC, HZ/50);
        } while (!kthread_should_stop());
        return 0;
 }
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
-       struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct list_head *head = &dcc->entry_list;
        struct discard_entry *entry, *this;
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
                        goto find_next;
 
                list_del(&entry->list);
-               SM_I(sbi)->dcc_info->nr_discards -= total_len;
+               dcc->nr_discards -= total_len;
                kmem_cache_free(discard_entry_slab, entry);
        }
 
-       wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
+       dcc->discard_wake = 1;
+       wake_up_interruptible_all(&dcc->discard_wait_queue);
 }
 
 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        if (!dcc)
                return -ENOMEM;
 
+       dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
        INIT_LIST_HEAD(&dcc->entry_list);
-       for (i = 0; i < MAX_PLIST_NUM; i++)
+       for (i = 0; i < MAX_PLIST_NUM; i++) {
                INIT_LIST_HEAD(&dcc->pend_list[i]);
+               if (i >= dcc->discard_granularity - 1)
+                       dcc->pend_list_tag[i] |= P_ACTIVE;
+       }
        INIT_LIST_HEAD(&dcc->wait_list);
        mutex_init(&dcc->cmd_lock);
        atomic_set(&dcc->issued_discard, 0);
 
                schedule();
        }
+       /* It's time to issue all the filed discards */
+       mark_discard_range_all(sbi);
 out:
        range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
        return err;
 
                spin_unlock(&sbi->stat_lock);
                return count;
        }
+
+       if (!strcmp(a->attr.name, "discard_granularity")) {
+               struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+               int i;
+
+               if (t == 0 || t > MAX_PLIST_NUM)
+                       return -EINVAL;
+               if (t == *ui)
+                       return count;
+
+               mutex_lock(&dcc->cmd_lock);
+               for (i = 0; i < MAX_PLIST_NUM; i++) {
+                       if (i >= t - 1)
+                               dcc->pend_list_tag[i] |= P_ACTIVE;
+                       else
+                               dcc->pend_list_tag[i] &= (~P_ACTIVE);
+               }
+               mutex_unlock(&dcc->cmd_lock);
+               return count;
+       }
+
        *ui = t;
 
        if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
        ATTR_LIST(gc_urgent),
        ATTR_LIST(reclaim_segments),
        ATTR_LIST(max_small_discards),
+       ATTR_LIST(discard_granularity),
        ATTR_LIST(batched_trim_sections),
        ATTR_LIST(ipu_policy),
        ATTR_LIST(min_ipu_util),