From: Wengang Wang Date: Tue, 29 Jan 2019 03:18:16 +0000 (-0800) Subject: Revert "xfs: don't chain ioends during writepage submission" X-Git-Tag: v4.1.12-124.31.3~313 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=7c23db20dfcdb221a9c780af4b464284256fc39d;p=users%2Fjedix%2Flinux-maple.git Revert "xfs: don't chain ioends during writepage submission" This reverts commit 34457adcadaf557febddb9f715368bbd5c3fd239. These commits are very possibly to cause SIGBUS issue. (We can't verify that in customer's environment). Revert them. Orabug: 29279692 Signed-off-by: Wengang Wang Reviewed-by: Shan Hai Signed-off-by: Brian Maly --- diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0676c1fef6c06..fc26695a88df2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -47,6 +47,7 @@ struct xfs_writepage_ctx { struct xfs_bmbt_irec imap; bool imap_valid; unsigned int io_type; + struct xfs_ioend *iohead; struct xfs_ioend *ioend; sector_t last_block; }; @@ -280,7 +281,7 @@ xfs_alloc_ioend( */ atomic_set(&ioend->io_remaining, 1); ioend->io_error = 0; - INIT_LIST_HEAD(&ioend->io_list); + ioend->io_list = NULL; ioend->io_type = type; ioend->io_inode = inode; ioend->io_buffer_head = NULL; @@ -425,7 +426,8 @@ xfs_start_buffer_writeback( STATIC void xfs_start_page_writeback( struct page *page, - int clear_dirty) + int clear_dirty, + int buffers) { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); @@ -444,6 +446,10 @@ xfs_start_page_writeback( set_page_writeback_keepwrite(page); unlock_page(page); + + /* If no buffers on the page are to be written, finish it here */ + if (!buffers) + end_page_writeback(page); } static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) @@ -452,90 +458,110 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) } /* - * Submit all of the bios for an ioend. We are only passed a single ioend at a - * time; the caller is responsible for chaining prior to submission. + * Submit all of the bios for all of the ioends we have saved up, covering the + * initial writepage page and also any probed pages. + * + * Because we may have multiple ioends spanning a page, we need to start + * writeback on all the buffers before we submit them for I/O. If we mark the + * buffers as we got, then we can end up with a page that only has buffers + * marked async write and I/O complete on can occur before we mark the other + * buffers async write. + * + * The end result of this is that we trip a bug in end_page_writeback() because + * we call it twice for the one page as the code in end_buffer_async_write() + * assumes that all buffers on the page are started at the same time. + * + * The fix is two passes across the ioend list - one to start writeback on the + * buffer_heads, and then submit them for I/O on the second pass. * * If @fail is non-zero, it means that we have a situation where some part of * the submission process has failed after we have marked paged for writeback * and unlocked them. In this situation, we need to fail the ioend chain rather * than submit it to IO. This typically only happens on a filesystem shutdown. */ -STATIC int +STATIC void xfs_submit_ioend( struct writeback_control *wbc, xfs_ioend_t *ioend, - int status) + int fail) { + xfs_ioend_t *head = ioend; + xfs_ioend_t *next; struct buffer_head *bh; struct bio *bio; sector_t lastblock = 0; - /* Reserve log space if we might write beyond the on-disk inode size. */ - if (!status && - ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) - status = xfs_setfilesize_trans_alloc(ioend); - /* - * If we are failing the IO now, just mark the ioend with an - * error and finish it. This will run IO completion immediately - * as there is only one reference to the ioend at this point in - * time. - */ - if (status) { - ioend->io_error = status; - xfs_finish_ioend(ioend); - return status; - } + /* Pass 1 - start writeback */ + do { + next = ioend->io_list; + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) + xfs_start_buffer_writeback(bh); + } while ((ioend = next) != NULL); - bio = NULL; - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { + /* Pass 2 - submit I/O */ + ioend = head; + do { + next = ioend->io_list; + bio = NULL; - if (!bio) { -retry: - bio = xfs_alloc_ioend_bio(bh); - } else if (bh->b_blocknr != lastblock + 1) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; + /* + * If we are failing the IO now, just mark the ioend with an + * error and finish it. This will run IO completion immediately + * as there is only one reference to the ioend at this point in + * time. + */ + if (fail) { + ioend->io_error = fail; + xfs_finish_ioend(ioend); + continue; } - if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; - } + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { - lastblock = bh->b_blocknr; - } - if (bio) - xfs_submit_ioend_bio(wbc, ioend, bio); - xfs_finish_ioend(ioend); - return 0; + if (!bio) { + retry: + bio = xfs_alloc_ioend_bio(bh); + } else if (bh->b_blocknr != lastblock + 1) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + lastblock = bh->b_blocknr; + } + if (bio) + xfs_submit_ioend_bio(wbc, ioend, bio); + xfs_finish_ioend(ioend); + } while ((ioend = next) != NULL); } /* * Test to see if we've been building up a completion structure for * earlier buffers -- if so, we try to append to this ioend if we * can, otherwise we finish off any current ioend and start another. - * Return the ioend we finished off so that the caller can submit it - * once it has finished processing the dirty page. + * Return true if we've finished the given ioend. */ STATIC void xfs_add_to_ioend( struct inode *inode, struct buffer_head *bh, xfs_off_t offset, - struct xfs_writepage_ctx *wpc, - struct list_head *iolist) + struct xfs_writepage_ctx *wpc) { if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || bh->b_blocknr != wpc->last_block + 1) { struct xfs_ioend *new; - if (wpc->ioend) - list_add(&wpc->ioend->io_list, iolist); - new = xfs_alloc_ioend(inode, wpc->io_type); new->io_offset = offset; new->io_buffer_head = bh; new->io_buffer_tail = bh; + if (wpc->ioend) + wpc->ioend->io_list = new; wpc->ioend = new; } else { wpc->ioend->io_buffer_tail->b_private = bh; @@ -545,7 +571,6 @@ xfs_add_to_ioend( bh->b_private = NULL; wpc->ioend->io_size += bh->b_size; wpc->last_block = bh->b_blocknr; - xfs_start_buffer_writeback(bh); } STATIC void @@ -707,41 +732,44 @@ out_invalidate: return; } -/* - * We implement an immediate ioend submission policy here to avoid needing to - * chain multiple ioends and hence nest mempool allocations which can violate - * forward progress guarantees we need to provide. The current ioend we are - * adding buffers to is cached on the writepage context, and if the new buffer - * does not append to the cached ioend it will create a new ioend and cache that - * instead. - * - * If a new ioend is created and cached, the old ioend is returned and queued - * locally for submission once the entire page is processed or an error has been - * detected. While ioends are submitted immediately after they are completed, - * batching optimisations are provided by higher level block plugging. - * - * At the end of a writeback pass, there will be a cached ioend remaining on the - * writepage context that the caller will need to submit. - */ static int -xfs_writepage_map( +xfs_writepage_submit( struct xfs_writepage_ctx *wpc, struct writeback_control *wbc, + int status) +{ + struct blk_plug plug; + + /* Reserve log space if we might write beyond the on-disk inode size. */ + if (!status && wpc->ioend && wpc->ioend->io_type != XFS_IO_UNWRITTEN && + xfs_ioend_is_append(wpc->ioend)) + status = xfs_setfilesize_trans_alloc(wpc->ioend); + + if (wpc->iohead) { + blk_start_plug(&plug); + xfs_submit_ioend(wbc, wpc->iohead, status); + blk_finish_plug(&plug); + } + return status; +} + +static int +xfs_writepage_map( + struct xfs_writepage_ctx *wpc, struct inode *inode, struct page *page, loff_t offset, __uint64_t end_offset) { - LIST_HEAD(submit_list); - struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; ssize_t len = 1 << inode->i_blkbits; int error = 0; - int count = 0; int uptodate = 1; + int count = 0; bh = head = page_buffers(page); offset = page_offset(page); + do { if (offset >= end_offset) break; @@ -794,7 +822,7 @@ xfs_writepage_map( error = xfs_map_blocks(inode, offset, &wpc->imap, wpc->io_type); if (error) - goto out; + goto out_error; wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); } @@ -802,65 +830,46 @@ xfs_writepage_map( lock_buffer(bh); if (wpc->io_type != XFS_IO_OVERWRITE) xfs_map_at_offset(inode, bh, &wpc->imap, offset); - xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list); + xfs_add_to_ioend(inode, bh, offset, wpc); count++; } + if (!wpc->iohead) + wpc->iohead = wpc->ioend; + } while (offset += len, ((bh = bh->b_this_page) != head)); if (uptodate && bh == head) SetPageUptodate(page); - ASSERT(wpc->ioend || list_empty(&submit_list)); + xfs_start_page_writeback(page, 1, count); + ASSERT(wpc->iohead || !count); + return 0; -out: +out_error: /* - * On error, we have to fail the ioend here because we have locked - * buffers in the ioend. If we don't do this, we'll deadlock - * invalidating the page as that tries to lock the buffers on the page. - * Also, because we may have set pages under writeback, we have to make - * sure we run IO completion to mark the error state of the IO - * appropriately, so we can't cancel the ioend directly here. That means - * we have to mark this page as under writeback if we included any - * buffers from it in the ioend chain so that completion treats it - * correctly. + * On error, we have to fail the iohead here because we locked buffers + * in the ioend chain. If we don't do this, we'll deadlock invalidating + * the page as that tries to lock the buffers on the page. Also, because + * we may have set pages under writeback, we have to make sure we run + * IO completion to mark the error state of the IO appropriately, so we + * can't cancel the ioend directly here. That means we have to mark this + * page as under writeback if we included any buffers from it in the + * ioend chain so that completion treats it correctly. * - * If we didn't include the page in the ioend, the on error we can - * simply discard and unlock it as there are no other users of the page - * or it's buffers right now. The caller will still need to trigger - * submission of outstanding ioends on the writepage context so they are - * treated correctly on error. + * If we didn't include the page in the ioend, then we can simply + * discard and unlock it as there are no other users of the page or it's + * buffers right now. The caller will still need to trigger submission + * of outstanding ioends on the writepage context so they are treated + * correctly on error. */ - if (count) { - xfs_start_page_writeback(page, !error); - - /* - * Preserve the original error if there was one, otherwise catch - * submission errors here and propagate into subsequent ioend - * submissions. - */ - list_for_each_entry_safe(ioend, next, &submit_list, io_list) { - int error2; - - list_del_init(&ioend->io_list); - error2 = xfs_submit_ioend(wbc, ioend, error); - if (error2 && !error) - error = error2; - } - } else if (error) { + if (count) + xfs_start_page_writeback(page, 0, count); + else { xfs_aops_discard_page(page); ClearPageUptodate(page); unlock_page(page); - } else { - /* - * We can end up here with no error and nothing to write if we - * race with a partial page truncate on a sub-page block sized - * filesystem. In that case we need to mark the page clean. - */ - xfs_start_page_writeback(page, 1); - end_page_writeback(page); } - mapping_set_error(page->mapping, error); return error; } @@ -976,7 +985,7 @@ xfs_do_writepage( end_offset = offset; } - return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); + return xfs_writepage_map(wpc, inode, page, offset, end_offset); redirty: redirty_page_for_writepage(wbc, page); @@ -995,9 +1004,7 @@ xfs_vm_writepage( int ret; ret = xfs_do_writepage(page, wbc, &wpc); - if (wpc.ioend) - ret = xfs_submit_ioend(wbc, wpc.ioend, ret); - return ret; + return xfs_writepage_submit(&wpc, wbc, ret); } STATIC int @@ -1012,9 +1019,7 @@ xfs_vm_writepages( xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); - if (wpc.ioend) - ret = xfs_submit_ioend(wbc, wpc.ioend, ret); - return ret; + return xfs_writepage_submit(&wpc, wbc, ret); } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 4e01bd5b64266..3c3f1a37a1c79 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -41,7 +41,7 @@ enum { * It can manage several multi-page bio's at once. */ typedef struct xfs_ioend { - struct list_head io_list; /* next ioend in chain */ + struct xfs_ioend *io_list; /* next ioend in chain */ unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */