}
 
 static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
-                                                            u64 start, u64 end)
+                                                            u64 start)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct io_failure_record *failrec;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       const u32 sectorsize = fs_info->sectorsize;
        int ret;
        u64 logical;
 
                return ERR_PTR(-ENOMEM);
 
        failrec->start = start;
-       failrec->len = end - start + 1;
+       failrec->len = sectorsize;
        failrec->this_mirror = 0;
        failrec->bio_flags = 0;
        failrec->in_validation = 0;
        free_extent_map(em);
 
        /* Set the bits in the private failure tree */
-       ret = set_extent_bits(failure_tree, start, end,
+       ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
                              EXTENT_LOCKED | EXTENT_DIRTY);
        if (ret >= 0) {
                ret = set_state_failrec(failure_tree, start, failrec);
                /* Set the bits in the inode's tree */
-               ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
+               ret = set_extent_bits(tree, start, start + sectorsize - 1,
+                                     EXTENT_DAMAGED);
        } else if (ret < 0) {
                kfree(failrec);
                return ERR_PTR(ret);
        return false;
 }
 
-blk_status_t btrfs_submit_read_repair(struct inode *inode,
-                                     struct bio *failed_bio, u32 bio_offset,
-                                     struct page *page, unsigned int pgoff,
-                                     u64 start, u64 end, int failed_mirror,
-                                     submit_bio_hook_t *submit_bio_hook)
+int btrfs_repair_one_sector(struct inode *inode,
+                           struct bio *failed_bio, u32 bio_offset,
+                           struct page *page, unsigned int pgoff,
+                           u64 start, int failed_mirror,
+                           submit_bio_hook_t *submit_bio_hook)
 {
        struct io_failure_record *failrec;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 
        BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
-       failrec = btrfs_get_io_failure_record(inode, start, end);
+       failrec = btrfs_get_io_failure_record(inode, start);
        if (IS_ERR(failrec))
-               return errno_to_blk_status(PTR_ERR(failrec));
-
-       need_validation = btrfs_io_needs_validation(inode, failed_bio);
+               return PTR_ERR(failrec);
 
+       /*
+        * We will only submit repair for one sector, thus we don't need
+        * extra validation anymore.
+        *
+        * TODO: All those extra validation related code will be cleaned up
+        * later.
+        */
+       need_validation = false;
        if (!btrfs_check_repairable(inode, need_validation, failrec,
                                    failed_mirror)) {
                free_io_failure(failure_tree, tree, failrec);
-               return BLK_STS_IOERR;
+               return -EIO;
        }
 
        repair_bio = btrfs_io_bio_alloc(1);
                free_io_failure(failure_tree, tree, failrec);
                bio_put(repair_bio);
        }
-       return status;
+       return blk_status_to_errno(status);
+}
+
+static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+
+       ASSERT(page_offset(page) <= start &&
+              start + len <= page_offset(page) + PAGE_SIZE);
+
+       /*
+        * For subapge metadata case, all btrfs_page_* helpers need page to
+        * have page::private populated.
+        * But we can have rare case where the last eb in the page is only
+        * referred by the IO, and it gets released immedately after it's
+        * read and verified.
+        *
+        * This can detach the page private completely.
+        * In that case, we can just skip the page status update completely,
+        * as the page has no eb anymore.
+        */
+       if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) {
+               ASSERT(!is_data_inode(page->mapping->host));
+               return;
+       }
+       if (uptodate) {
+               btrfs_page_set_uptodate(fs_info, page, start, len);
+       } else {
+               btrfs_page_clear_uptodate(fs_info, page, start, len);
+               btrfs_page_set_error(fs_info, page, start, len);
+       }
+
+       if (fs_info->sectorsize == PAGE_SIZE)
+               unlock_page(page);
+       else if (is_data_inode(page->mapping->host))
+               /*
+                * For subpage data, unlock the page if we're the last reader.
+                * For subpage metadata, page lock is not utilized for read.
+                */
+               btrfs_subpage_end_reader(fs_info, page, start, len);
+}
+
+static blk_status_t submit_read_repair(struct inode *inode,
+                                     struct bio *failed_bio, u32 bio_offset,
+                                     struct page *page, unsigned int pgoff,
+                                     u64 start, u64 end, int failed_mirror,
+                                     unsigned int error_bitmap,
+                                     submit_bio_hook_t *submit_bio_hook)
+{
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       const u32 sectorsize = fs_info->sectorsize;
+       const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
+       int error = 0;
+       int i;
+
+       BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
+
+       /* We're here because we had some read errors or csum mismatch */
+       ASSERT(error_bitmap);
+
+       /*
+        * We only get called on buffered IO, thus page must be mapped and bio
+        * must not be cloned.
+        */
+       ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
+
+       /* Iterate through all the sectors in the range */
+       for (i = 0; i < nr_bits; i++) {
+               const unsigned int offset = i * sectorsize;
+               struct extent_state *cached = NULL;
+               bool uptodate = false;
+               int ret;
+
+               if (!(error_bitmap & (1U << i))) {
+                       /*
+                        * This sector has no error, just end the page read
+                        * and unlock the range.
+                        */
+                       uptodate = true;
+                       goto next;
+               }
+
+               ret = btrfs_repair_one_sector(inode, failed_bio,
+                               bio_offset + offset,
+                               page, pgoff + offset, start + offset,
+                               failed_mirror, submit_bio_hook);
+               if (!ret) {
+                       /*
+                        * We have submitted the read repair, the page release
+                        * will be handled by the endio function of the
+                        * submitted repair bio.
+                        * Thus we don't need to do any thing here.
+                        */
+                       continue;
+               }
+               /*
+                * Repair failed, just record the error but still continue.
+                * Or the remaining sectors will not be properly unlocked.
+                */
+               if (!error)
+                       error = ret;
+next:
+               end_page_read(page, uptodate, start + offset, sectorsize);
+               if (uptodate)
+                       set_extent_uptodate(&BTRFS_I(inode)->io_tree,
+                                       start + offset,
+                                       start + offset + sectorsize - 1,
+                                       &cached, GFP_ATOMIC);
+               unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
+                               start + offset,
+                               start + offset + sectorsize - 1,
+                               &cached);
+       }
+       return errno_to_blk_status(error);
 }
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
        btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
 }
 
-static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
-{
-       struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
-
-       ASSERT(page_offset(page) <= start &&
-               start + len <= page_offset(page) + PAGE_SIZE);
-
-       if (uptodate) {
-               btrfs_page_set_uptodate(fs_info, page, start, len);
-       } else {
-               btrfs_page_clear_uptodate(fs_info, page, start, len);
-               btrfs_page_set_error(fs_info, page, start, len);
-       }
-
-       if (fs_info->sectorsize == PAGE_SIZE)
-               unlock_page(page);
-       else if (is_data_inode(page->mapping->host))
-               /*
-                * For subpage data, unlock the page if we're the last reader.
-                * For subpage metadata, page lock is not utilized for read.
-                */
-               btrfs_subpage_end_reader(fs_info, page, start, len);
-}
-
 /*
  * Find extent buffer for a givne bytenr.
  *
 static void end_bio_extent_readpage(struct bio *bio)
 {
        struct bio_vec *bvec;
-       int uptodate = !bio->bi_status;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct extent_io_tree *tree, *failure_tree;
        struct processed_extent processed = { 0 };
 
        ASSERT(!bio_flagged(bio, BIO_CLONED));
        bio_for_each_segment_all(bvec, bio, iter_all) {
+               bool uptodate = !bio->bi_status;
                struct page *page = bvec->bv_page;
                struct inode *inode = page->mapping->host;
                struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
                const u32 sectorsize = fs_info->sectorsize;
+               unsigned int error_bitmap = (unsigned int)-1;
                u64 start;
                u64 end;
                u32 len;
 
                mirror = io_bio->mirror_num;
                if (likely(uptodate)) {
-                       if (is_data_inode(inode))
-                               ret = btrfs_verify_data_csum(io_bio,
+                       if (is_data_inode(inode)) {
+                               error_bitmap = btrfs_verify_data_csum(io_bio,
                                                bio_offset, page, start, end);
-                       else
+                               ret = error_bitmap;
+                       } else {
                                ret = btrfs_validate_metadata_buffer(io_bio,
                                        page, start, end, mirror);
+                       }
                        if (ret)
-                               uptodate = 0;
+                               uptodate = false;
                        else
                                clean_io_failure(BTRFS_I(inode)->root->fs_info,
                                                 failure_tree, tree, start,
                        goto readpage_ok;
 
                if (is_data_inode(inode)) {
-
                        /*
-                        * The generic bio_readpage_error handles errors the
-                        * following way: If possible, new read requests are
-                        * created and submitted and will end up in
-                        * end_bio_extent_readpage as well (if we're lucky,
-                        * not in the !uptodate case). In that case it returns
-                        * 0 and we just go on with the next page in our bio.
-                        * If it can't handle the error it will return -EIO and
-                        * we remain responsible for that page.
+                        * btrfs_submit_read_repair() will handle all the good
+                        * and bad sectors, we just continue to the next bvec.
                         */
-                       if (!btrfs_submit_read_repair(inode, bio, bio_offset,
-                                               page,
-                                               start - page_offset(page),
-                                               start, end, mirror,
-                                               btrfs_submit_data_bio)) {
-                               uptodate = !bio->bi_status;
-                               ASSERT(bio_offset + len > bio_offset);
-                               bio_offset += len;
-                               continue;
-                       }
+                       submit_read_repair(inode, bio, bio_offset, page,
+                                          start - page_offset(page), start,
+                                          end, mirror, error_bitmap,
+                                          btrfs_submit_data_bio);
+
+                       ASSERT(bio_offset + len > bio_offset);
+                       bio_offset += len;
+                       continue;
                } else {
                        struct extent_buffer *eb;