From 2ba9b9cf9749867d4512934efe3c0e9d2a7e693f Mon Sep 17 00:00:00 2001 From: Ryan Ding Date: Tue, 20 Oct 2015 16:47:33 +0800 Subject: [PATCH] ocfs2: fix a performance issue with synced buffer io orabug: 20396205 If we flush data with WB_SYNC_ALL which is set in struct writeback_control. It will be transfered to a bio with WRITE_SYNC flag(that is done in the interface block_write_full_page()). And after multi-queue is introduced to kernel block layer, a bio with SYNC flag will be sent to disk without queue. It will affect the performance significantly if the disk has a poor iops. This patch is a work around to this. Use filemap_flush() to try to flush dirty pages with WB_SYNC_NONE flag. * In journal=order mode, this is safe because the following jbd2_journal_force_commit() will ensure data integrity. * In journal=writeback mode, we will call filemap_write_and_wait_range() to meet the semantics of O_SYNC & O_DIRECT. It should help to improve performance with direct io (in the case when direct io fall to buffer io), and buffer io with O_SYNC. Signed-off-by: Ryan Ding Reviewed-by: Junxiao Bi --- fs/ocfs2/file.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d6716aedaa31..cdd30c04537e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2417,9 +2417,17 @@ relock: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || dropped_dio) { - ret = filemap_fdatawrite_range(file->f_mapping, - iocb->ki_pos - written, - iocb->ki_pos - 1); + /* + * There is an performance issue when we are doing a flush with + * WB_SYNC_ALL flag. block_write_full_page() will transfer it + * to REQ_SYNC flag on bio. And block layer will skip queue if + * that flag is found. It will affect the performance + * significantly if the disk has a poor iops. + * So try to work around by calling filemap_flush(). This is + * safe because following jbd2 force commit will helps to + * ensure data integrity. + */ + ret = filemap_flush(file->f_mapping); if (ret < 0) written = ret; @@ -2429,10 +2437,16 @@ relock: written = ret; } - if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, - iocb->ki_pos - written, - iocb->ki_pos - 1); + /* + * When journal=order, jbd2 will write and wait all dirty + * pages, no need to do it again. + * And to meet the semantics of O_SYNC or O_DIRECT, we need to + * wait on dirty pages that filemap_flush() miss. + */ + if (!ret && !ocfs2_should_order_data(inode)) + ret = filemap_write_and_wait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } no_sync: -- 2.50.1