Note that, it requires "f2fs: return correct errno in f2fs_gc".
This adds a lightweight non-persistent snapshotting scheme to f2fs.
To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.
Signed-off-by: Daniel Rosenberg <drosen@google.com>
[Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
 
        if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
                __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+       if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+               __set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+       else
+               __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
        /* set this flag to activate crc|cp_ver for recovery */
        __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
        __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
 
        clear_sbi_flag(sbi, SBI_IS_DIRTY);
        clear_sbi_flag(sbi, SBI_NEED_CP);
+       sbi->unusable_block_count = 0;
        __set_cp_next_pack(sbi);
 
        /*
        unsigned long long ckpt_ver;
        int err = 0;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               if (cpc->reason != CP_PAUSE)
+                       return 0;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "Start checkpoint disabled!");
+       }
        mutex_lock(&sbi->cp_mutex);
 
        if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
 
        if (fio->in_list)
                goto next;
 out:
-       if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+       if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+                               f2fs_is_checkpoint_ready(sbi))
                __submit_merged_bio(io);
        up_write(&io->io_rwsem);
 }
                        is_inode_flag_set(inode, FI_NEED_IPU))
                return true;
 
+       if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+               return true;
+
        return false;
 }
 
                        return true;
                if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
                        return true;
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                       f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+                       return true;
        }
        return false;
 }
 
        trace_f2fs_write_begin(inode, pos, len, flags);
 
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               goto fail;
+
        if ((f2fs_is_atomic_file(inode) &&
                        !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
                        is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
 
                seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
                        si->sbi->sb->s_bdev, i++,
                        f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-                       f2fs_cp_error(si->sbi) ? "Error": "Good");
+                       is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+                       "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
                seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
                           si->sit_area_segs, si->nat_area_segs);
                seq_printf(s, "[SSA: %d] [MAIN: %d",
 
 #define F2FS_MOUNT_QUOTA               0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT                0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT  0x02000000
 
 #define F2FS_OPTION(sbi)       ((sbi)->mount_opt)
 #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
 #define        CP_RECOVERY     0x00000008
 #define        CP_DISCARD      0x00000010
 #define CP_TRIMMED     0x00000020
+#define CP_PAUSE       0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST                8       /* issue 8 discards per round */
 #define DEF_DISCARD_URGENT_UTIL                80      /* do more discard over 80% */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
 #define DEF_IDLE_INTERVAL              5       /* 5 secs */
+#define DEF_DISABLE_INTERVAL           5       /* 5 secs */
 
 struct cp_control {
        int reason;
        SBI_NEED_CP,                            /* need to checkpoint */
        SBI_IS_SHUTDOWN,                        /* shutdown by ioctl */
        SBI_IS_RECOVERED,                       /* recovered orphan/data */
+       SBI_CP_DISABLED,                        /* CP was disabled last mount */
 };
 
 enum {
        REQ_TIME,
        DISCARD_TIME,
        GC_TIME,
+       DISABLE_TIME,
        MAX_TIME,
 };
 
        block_t reserved_blocks;                /* configurable reserved blocks */
        block_t current_reserved_blocks;        /* current reserved blocks */
 
+       /* Additional tracking for no checkpoint mode */
+       block_t unusable_block_count;           /* # of blocks saved by last cp */
+
        unsigned int nquota_files;              /* # of quota sysfile */
 
        u32 s_next_generation;                  /* for NFS support */
 
        if (!__allow_reserved_blocks(sbi, inode, true))
                avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               avail_user_block_count -= sbi->unusable_block_count;
        if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
                diff = sbi->total_valid_block_count - avail_user_block_count;
                if (diff > *count)
 
        if (!__allow_reserved_blocks(sbi, inode, false))
                valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               valid_block_count += sbi->unusable_block_count;
 
        if (unlikely(valid_block_count > sbi->user_block_count)) {
                spin_unlock(&sbi->stat_lock);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                        struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
        if (test_opt(sbi, LFS) && (rw == WRITE) &&
                                block_unaligned_IO(inode, iocb, iter))
                return true;
+       if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+               return true;
+
        return false;
 }
 
 
        };
        unsigned int seq_id = 0;
 
-       if (unlikely(f2fs_readonly(inode->i_sb)))
+       if (unlikely(f2fs_readonly(inode->i_sb) ||
+                               is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                return 0;
 
        trace_f2fs_sync_file_enter(inode);
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               f2fs_msg(sbi->sb, KERN_INFO,
+                       "Skipping Checkpoint. Checkpoints currently disabled.");
+               return -EINVAL;
+       }
+
        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
        if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return -EINVAL;
+
        if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
                                                        sizeof(range)))
                return -EFAULT;
 
 
                if (sec_usage_check(sbi, secno))
                        goto next;
+               /* Don't touch checkpointed data */
+               if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+                                       get_ckpt_valid_blocks(sbi, segno)))
+                       goto next;
                if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
                        goto next;
 
                 * threshold, we can make them free by checkpoint. Then, we
                 * secure free segments which doesn't need fggc any more.
                 */
-               if (prefree_segments(sbi)) {
+               if (prefree_segments(sbi) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        ret = f2fs_write_checkpoint(sbi, &cpc);
                        if (ret)
                                goto stop;
                        segno = NULL_SEGNO;
                        goto gc_more;
                }
-               if (gc_type == FG_GC)
+               if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
                        ret = f2fs_write_checkpoint(sbi, &cpc);
        }
 stop:
 
        if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
                return 0;
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return -ENOSPC;
+
        /*
         * We need to balance fs here to prevent from producing dirty node pages
         * during the urgent cleaning time when runing out of free sections.
        stat_dec_inline_dir(inode);
        stat_dec_inline_inode(inode);
 
-       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+       if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
        else
                f2fs_inode_synced(inode);
 
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_link(old_dentry, dir, dentry);
        if (err)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
                                      &disk_link);
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        err = dquot_initialize(dir);
        if (err)
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        (!projid_eq(F2FS_I(new_dir)->i_projid,
 
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
+       err = f2fs_is_checkpoint_ready(sbi);
+       if (err)
+               return err;
 
        if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
                        !projid_eq(F2FS_I(new_dir)->i_projid,
 
                return false;
        if (sbi->gc_mode == GC_URGENT)
                return true;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return true;
 
        return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
                        SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
        if (need && excess_cached_nats(sbi))
                f2fs_balance_fs_bg(sbi);
 
+       if (f2fs_is_checkpoint_ready(sbi))
+               return;
+
        /*
         * We should do GC or end up with checkpoint, if there are so many dirty
         * dir/node pages without enough free segments.
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-       unsigned short valid_blocks;
+       unsigned short valid_blocks, ckpt_valid_blocks;
 
        if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
                return;
        mutex_lock(&dirty_i->seglist_lock);
 
        valid_blocks = get_valid_blocks(sbi, segno, false);
+       ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-       if (valid_blocks == 0) {
+       if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+                               ckpt_valid_blocks == sbi->blocks_per_seg)) {
                __locate_dirty_segment(sbi, segno, PRE);
                __remove_dirty_segment(sbi, segno, DIRTY);
        } else if (valid_blocks < sbi->blocks_per_seg) {
        mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (IS_CURSEG(sbi, segno))
+                       continue;
+               __locate_dirty_segment(sbi, segno, PRE);
+               __remove_dirty_segment(sbi, segno, DIRTY);
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+       block_t holes[2] = {0, 0};      /* DATA and NODE */
+       struct seg_entry *se;
+       unsigned int segno;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               se = get_seg_entry(sbi, segno);
+               if (IS_NODESEG(se->type))
+                       holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+               else
+                       holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+
+       if (holes[DATA] > ovp || holes[NODE] > ovp)
+               return -EAGAIN;
+       return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+       struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+       unsigned int segno = 0;
+
+       mutex_lock(&dirty_i->seglist_lock);
+       for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+               if (get_valid_blocks(sbi, segno, false))
+                       continue;
+               if (get_ckpt_valid_blocks(sbi, segno))
+                       continue;
+               mutex_unlock(&dirty_i->seglist_lock);
+               return segno;
+       }
+       mutex_unlock(&dirty_i->seglist_lock);
+       return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
                struct block_device *bdev, block_t lstart,
                block_t start, block_t len)
                        sbi->discard_blks--;
 
                /* don't overwrite by SSR to keep node chain */
-               if (IS_NODESEG(se->type)) {
+               if (IS_NODESEG(se->type) &&
+                               !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
                        if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
                                se->ckpt_valid_blocks++;
                }
                        f2fs_bug_on(sbi, 1);
                        se->valid_blocks++;
                        del = 0;
+               } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+                       /*
+                        * If checkpoints are off, we must not reuse data that
+                        * was used in the previous checkpoint. If it was used
+                        * before, we must track that to know how much space we
+                        * really have.
+                        */
+                       if (f2fs_test_bit(offset, se->ckpt_valid_map))
+                               sbi->unusable_block_count++;
                }
 
                if (f2fs_test_and_clear_bit(offset, se->discard_map))
        if (sbi->segs_per_sec != 1)
                return CURSEG_I(sbi, type)->segno;
 
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+
        if (test_opt(sbi, NOHEAP) &&
                (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
                return 0;
                        return 1;
                }
        }
+
+       /* find valid_blocks=0 in dirty list */
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               segno = get_free_segment(sbi);
+               if (segno != NULL_SEGNO) {
+                       curseg->next_segno = segno;
+                       return 1;
+               }
+       }
        return 0;
 }
 
        else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
                                        type == CURSEG_WARM_NODE)
                new_curseg(sbi, type, false);
-       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+       else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+                       likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                new_curseg(sbi, type, false);
        else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
                change_curseg(sbi, type);
 
                return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+                               unsigned int segno)
+{
+       return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
                                        struct f2fs_sit_entry *rs)
 {
                reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+       if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
+       if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+               return 0;
+       return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
        return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
 
        Opt_alloc,
        Opt_fsync,
        Opt_test_dummy_encryption,
+       Opt_checkpoint,
        Opt_err,
 };
 
        {Opt_alloc, "alloc_mode=%s"},
        {Opt_fsync, "fsync_mode=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
+       {Opt_checkpoint, "checkpoint=%s"},
        {Opt_err, NULL},
 };
 
                                        "Test dummy encryption mount option ignored");
 #endif
                        break;
+               case Opt_checkpoint:
+                       name = match_strdup(&args[0]);
+                       if (!name)
+                               return -ENOMEM;
+
+                       if (strlen(name) == 6 &&
+                                       !strncmp(name, "enable", 6)) {
+                               clear_opt(sbi, DISABLE_CHECKPOINT);
+                       } else if (strlen(name) == 7 &&
+                                       !strncmp(name, "disable", 7)) {
+                               set_opt(sbi, DISABLE_CHECKPOINT);
+                       } else {
+                               kfree(name);
+                               return -EINVAL;
+                       }
+                       kfree(name);
+                       break;
                default:
                        f2fs_msg(sb, KERN_ERR,
                                "Unrecognized mount option \"%s\" or missing value",
                }
        }
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+               f2fs_msg(sb, KERN_ERR,
+                               "LFS not compatible with checkpoint=disable\n");
+               return -EINVAL;
+       }
+
        /* Not pass down write hints if the number of active logs is lesser
         * than NR_CURSEG_TYPE.
         */
         * But, the previous checkpoint was not done by umount, it needs to do
         * clean checkpoint again.
         */
-       if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-                       !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+       if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+                       !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
                struct cp_control cpc = {
                        .reason = CP_UMOUNT,
                };
 
        if (unlikely(f2fs_cp_error(sbi)))
                return 0;
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+               return 0;
 
        trace_f2fs_sync_fs(sb, sync);
 
        buf->f_blocks = total_count - start_count;
        buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
                                                sbi->current_reserved_blocks;
+       if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+               buf->f_bfree = 0;
+       else
+               buf->f_bfree -= sbi->unusable_block_count;
+
        if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
                buf->f_bavail = buf->f_bfree -
                                F2FS_OPTION(sbi).root_reserved_blocks;
        else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
                seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT))
+               seq_puts(seq, ",checkpoint=disable");
+
        if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
                seq_printf(seq, ",fsync_mode=%s", "posix");
        else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
        set_opt(sbi, INLINE_DENTRY);
        set_opt(sbi, EXTENT_CACHE);
        set_opt(sbi, NOHEAP);
+       clear_opt(sbi, DISABLE_CHECKPOINT);
        sbi->sb->s_flags |= SB_LAZYTIME;
        set_opt(sbi, FLUSH_MERGE);
        set_opt(sbi, DISCARD);
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       struct cp_control cpc;
+       int err;
+
+       sbi->sb->s_flags |= SB_ACTIVE;
+
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_update_time(sbi, DISABLE_TIME);
+
+       while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+               err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+               if (err == -ENODATA)
+                       break;
+               if (err && err != -EAGAIN) {
+                       mutex_unlock(&sbi->gc_mutex);
+                       return err;
+               }
+       }
+       mutex_unlock(&sbi->gc_mutex);
+
+       err = sync_filesystem(sbi->sb);
+       if (err)
+               return err;
+
+       if (f2fs_disable_cp_again(sbi))
+               return -EAGAIN;
+
+       mutex_lock(&sbi->gc_mutex);
+       cpc.reason = CP_PAUSE;
+       set_sbi_flag(sbi, SBI_CP_DISABLED);
+       f2fs_write_checkpoint(sbi, &cpc);
+
+       sbi->unusable_block_count = 0;
+       mutex_unlock(&sbi->gc_mutex);
+       return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+       mutex_lock(&sbi->gc_mutex);
+       f2fs_dirty_to_prefree(sbi);
+
+       clear_sbi_flag(sbi, SBI_CP_DISABLED);
+       set_sbi_flag(sbi, SBI_IS_DIRTY);
+       mutex_unlock(&sbi->gc_mutex);
+
+       f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
        bool need_restart_gc = false;
        bool need_stop_gc = false;
        bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+       bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+       bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
        int i, j;
 #endif
        err = parse_options(sb, data);
        if (err)
                goto restore_opts;
+       checkpoint_changed =
+                       disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
        /*
         * Previous and new state of filesystem is RO,
                err = dquot_suspend(sb, -1);
                if (err < 0)
                        goto restore_opts;
-       } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
+       } else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
                /* dquot_resume needs RW */
                sb->s_flags &= ~SB_RDONLY;
                if (sb_any_quota_suspended(sb)) {
                goto restore_opts;
        }
 
+       if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = -EINVAL;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                       "disabling checkpoint not compatible with read-only");
+               goto restore_opts;
+       }
+
        /*
         * We stop the GC thread if FS is mounted as RO
         * or if background_gc = off is passed in mount
                clear_sbi_flag(sbi, SBI_IS_CLOSE);
        }
 
+       if (checkpoint_changed) {
+               if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+                       err = f2fs_disable_checkpoint(sbi);
+                       if (err)
+                               goto restore_gc;
+               } else {
+                       f2fs_enable_checkpoint(sbi);
+               }
+       }
+
        /*
         * We stop issue flush thread if FS is mounted as RO
         * or if flush_merge is not passed in mount option.
        sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
        sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
        sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+       sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
        for (i = 0; i < NR_COUNT_TYPE; i++)
        if (err)
                goto free_meta;
 
+       if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+               goto skip_recovery;
+
        /* recover fsynced data */
        if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
                /*
        /* f2fs_recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
 
+       if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+               err = f2fs_disable_checkpoint(sbi);
+               if (err)
+                       goto free_meta;
+       } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+               f2fs_enable_checkpoint(sbi);
+       }
+
        /*
         * If filesystem is not mounted as read-only then
         * do start the gc_thread.
 
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG               0x00001000
 #define CP_LARGE_NAT_BITMAP_FLAG       0x00000400
 #define CP_NOCRC_RECOVERY_FLAG 0x00000200
 #define CP_TRIMMED_FLAG                0x00000100