From: Matthew Wilcox Date: Tue, 25 Sep 2018 23:05:09 +0000 (-0400) Subject: xfs: Convert pag_ici_root to XArray X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=35783dba07b0c32b21194da248761ff6635b4d69;p=users%2Fwilly%2Fxarray.git xfs: Convert pag_ici_root to XArray Rename pag_ici_root to pag_ici_xa and use XArray APIs instead of radix tree APIs. Shorter code, typechecking on tag numbers, better error checking in xfs_reclaim_inode(), and eliminates a call to radix_tree_preload(). Signed-off-by: Matthew Wilcox --- diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2484dceab35ac..ec4c7191388a6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -179,7 +179,7 @@ xfs_perag_set_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - lockdep_assert_held(&pag->pag_ici_lock); + lockdep_assert_held(&pag->pag_inodes.xa_lock); if (pag->pag_ici_reclaimable++) return; @@ -198,7 +198,7 @@ xfs_perag_clear_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - lockdep_assert_held(&pag->pag_ici_lock); + lockdep_assert_held(&pag->pag_inodes.xa_lock); if (--pag->pag_ici_reclaimable) return; @@ -209,7 +209,7 @@ xfs_perag_clear_reclaim_tag( /* - * We set the inode flag atomically with the radix tree tag. + * We set the inode flag atomically with the xarray mark. * Once we get tag lookups on the radix tree, this inode flag * can go away. */ @@ -221,16 +221,16 @@ xfs_inode_set_reclaim_tag( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); + xa_lock(&pag->pag_inodes); spin_lock(&ip->i_flags_lock); - radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), - XFS_ICI_RECLAIM_TAG); + __xa_set_mark(&pag->pag_inodes, XFS_INO_TO_AGINO(mp, ip->i_ino), + XFS_ICI_RECLAIM_MARK); xfs_perag_set_reclaim_tag(pag); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); + xa_unlock(&pag->pag_inodes); xfs_perag_put(pag); } @@ -239,9 +239,9 @@ xfs_inode_clear_reclaim_tag( struct xfs_perag *pag, xfs_ino_t ino) { - radix_tree_tag_clear(&pag->pag_ici_root, + __xa_clear_mark(&pag->pag_inodes, XFS_INO_TO_AGINO(pag->pag_mount, ino), - XFS_ICI_RECLAIM_TAG); + XFS_ICI_RECLAIM_MARK); xfs_perag_clear_reclaim_tag(pag); } @@ -348,7 +348,7 @@ xfs_iget_cache_hit( /* * check for re-use of an inode within an RCU grace period due to the - * radix tree nodes not being updated yet. We monitor for this by + * xarray not being updated yet. We monitor for this by * setting the inode number to zero before freeing the inode structure. * If the inode has been reallocated and set up, then the inode number * will not match, so check for that, too. @@ -402,8 +402,8 @@ xfs_iget_cache_hit( /* * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode * from stomping over us while we recycle the inode. We can't - * clear the radix tree reclaimable tag yet as it requires - * pag_ici_lock to be held exclusive. + * clear the xarray reclaimable mark yet as it requires + * pag_inodes.xa_lock to be held. */ ip->i_flags |= XFS_IRECLAIM; @@ -428,7 +428,7 @@ xfs_iget_cache_hit( goto out_error; } - spin_lock(&pag->pag_ici_lock); + xa_lock(&pag->pag_inodes); spin_lock(&ip->i_flags_lock); /* @@ -447,7 +447,7 @@ xfs_iget_cache_hit( init_rwsem(&inode->i_rwsem); spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); + xa_unlock(&pag->pag_inodes); } else { /* If the VFS inode is being torn down, pause and try again. */ if (!igrab(inode)) { @@ -518,18 +518,7 @@ xfs_iget_cache_miss( goto out_destroy; /* - * Preload the radix tree so we can insert safely under the - * write spinlock. Note that we cannot sleep inside the preload - * region. Since we can be called from transaction context, don't - * recurse into the file system. - */ - if (radix_tree_preload(GFP_NOFS)) { - error = -EAGAIN; - goto out_destroy; - } - - /* - * Because the inode hasn't been added to the radix-tree yet it can't + * Because the inode hasn't been added to the xarray yet it can't * be found by another thread, so we can do the non-sleeping lock here. */ if (lock_flags) { @@ -538,8 +527,8 @@ xfs_iget_cache_miss( } /* - * These values must be set before inserting the inode into the radix - * tree as the moment it is inserted a concurrent lookup (allowed by the + * These values must be set before inserting the inode into the xarray + * as the moment it is inserted a concurrent lookup (allowed by the * RCU locking mechanism) can find it and that lookup must see that this * is an inode currently under construction (i.e. that XFS_INEW is set). * The ip->i_flags_lock that protects the XFS_INEW flag forms the @@ -555,23 +544,17 @@ xfs_iget_cache_miss( xfs_iflags_set(ip, iflags); /* insert the new inode */ - spin_lock(&pag->pag_ici_lock); - error = radix_tree_insert(&pag->pag_ici_root, agino, ip); - if (unlikely(error)) { - WARN_ON(error != -EEXIST); - XFS_STATS_INC(mp, xs_ig_dup); - error = -EAGAIN; - goto out_preload_end; - } - spin_unlock(&pag->pag_ici_lock); - radix_tree_preload_end(); + error = xa_insert(&pag->pag_inodes, agino, ip, GFP_NOFS); + if (error) + goto out_unlock; *ipp = ip; return 0; -out_preload_end: - spin_unlock(&pag->pag_ici_lock); - radix_tree_preload_end(); +out_unlock: + if (error == -EBUSY) + XFS_STATS_INC(mp, xs_ig_dup); + error = -EAGAIN; if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: @@ -619,7 +602,7 @@ xfs_iget( /* * xfs_reclaim_inode() uses the ILOCK to ensure an inode * doesn't get freed while it's being referenced during a - * radix tree traversal here. It assumes this function + * xarray traversal here. It assumes this function * aqcuires only the ILOCK (and therefore it has no need to * involve the IOLOCK in this synchronization). */ @@ -638,7 +621,7 @@ xfs_iget( again: error = 0; rcu_read_lock(); - ip = radix_tree_lookup(&pag->pag_ici_root, agino); + ip = xa_load(&pag->pag_inodes, agino); if (ip) { error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); @@ -718,7 +701,7 @@ xfs_icache_inode_is_allocated( /* * The inode lookup is done in batches to keep the amount of lock traffic and - * radix tree lookups to a minimum. The batch size is a trade off between + * xarray lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't * be too greedy. */ @@ -777,7 +760,7 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag, + xa_mark_t mark, int iter_flags) { uint32_t first_index; @@ -798,15 +781,8 @@ restart: rcu_read_lock(); - if (tag == -1) - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH); - else - nr_found = radix_tree_gang_lookup_tag( - &pag->pag_ici_root, - (void **) batch, first_index, - XFS_LOOKUP_BATCH, tag); + nr_found = xa_extract(&pag->pag_inodes, (void **)batch, + first_index, ULONG_MAX, XFS_LOOKUP_BATCH, mark); if (!nr_found) { rcu_read_unlock(); @@ -942,8 +918,8 @@ xfs_inode_ag_iterator_flags( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, - iter_flags); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, + XFS_ICI_ALL, iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -972,7 +948,7 @@ xfs_inode_ag_iterator_tag( void *args), int flags, void *args, - int tag) + xa_mark_t mark) { struct xfs_perag *pag; int error = 0; @@ -980,9 +956,9 @@ xfs_inode_ag_iterator_tag( xfs_agnumber_t ag; ag = 0; - while ((pag = xfs_perag_get_mark(mp, ag, tag))) { + while ((pag = xfs_perag_get_mark(mp, ag, mark))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, mark, 0); xfs_perag_put(pag); if (error) { @@ -1019,7 +995,7 @@ xfs_reclaim_inode_grab( return 1; /* - * The radix tree lock here protects a thread in xfs_iget from racing + * The i_flags_lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the * XFS_IRECLAIM flag set it will not touch us. * @@ -1086,7 +1062,7 @@ xfs_reclaim_inode( int sync_mode) { struct xfs_buf *bp = NULL; - xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ + xfs_ino_t ino = ip->i_ino; int error; restart: @@ -1172,22 +1148,21 @@ reclaim: XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); /* - * Remove the inode from the per-AG radix tree. + * Remove the inode from the per-AG xarray. * - * Because radix_tree_delete won't complain even if the item was never - * added to the tree assert that it's been there before to catch - * problems with the inode life time early on. + * Check that it was there before to catch problems with the + * inode life time early on. */ - spin_lock(&pag->pag_ici_lock); - if (!radix_tree_delete(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ino))) + xa_lock(&pag->pag_inodes); + if (__xa_erase(&pag->pag_inodes, + XFS_INO_TO_AGINO(ip->i_mount, ino)) != ip) ASSERT(0); xfs_perag_clear_reclaim_tag(pag); - spin_unlock(&pag->pag_ici_lock); + xa_unlock(&pag->pag_inodes); /* * Here we do an (almost) spurious inode lock in order to coordinate - * with inode cache radix tree lookups. This is because the lookup + * with inode cache xarray lookups. This is because the lookup * can reference the inodes in the cache without taking references. * * We make that OK here by ensuring that we wait until the inode is @@ -1259,11 +1234,10 @@ restart: int i; rcu_read_lock(); - nr_found = radix_tree_gang_lookup_tag( - &pag->pag_ici_root, + nr_found = xa_extract(&pag->pag_inodes, (void **)batch, first_index, - XFS_LOOKUP_BATCH, - XFS_ICI_RECLAIM_TAG); + ULONG_MAX, XFS_LOOKUP_BATCH, + XFS_ICI_RECLAIM_MARK); if (!nr_found) { done = 1; rcu_read_unlock(); @@ -1496,7 +1470,7 @@ __xfs_icache_free_eofblocks( struct xfs_eofblocks *eofb, int (*execute)(struct xfs_inode *ip, int flags, void *args), - int tag) + xa_mark_t mark) { int flags = SYNC_TRYLOCK; @@ -1504,7 +1478,7 @@ __xfs_icache_free_eofblocks( flags = SYNC_WAIT; return xfs_inode_ag_iterator_tag(mp, execute, flags, - eofb, tag); + eofb, mark); } int @@ -1513,7 +1487,7 @@ xfs_icache_free_eofblocks( struct xfs_eofblocks *eofb) { return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks, - XFS_ICI_EOFBLOCKS_TAG); + XFS_ICI_EOFBLOCKS_MARK); } /* @@ -1571,12 +1545,12 @@ xfs_inode_free_quota_eofblocks( static inline unsigned long xfs_iflag_for_tag( - int tag) + xa_mark_t mark) { - switch (tag) { - case XFS_ICI_EOFBLOCKS_TAG: + switch (mark) { + case XFS_ICI_EOFBLOCKS_MARK: return XFS_IEOFBLOCKS; - case XFS_ICI_COWBLOCKS_TAG: + case XFS_ICI_COWBLOCKS_MARK: return XFS_ICOWBLOCKS; default: ASSERT(0); @@ -1607,10 +1581,10 @@ __xfs_inode_set_blocks_mark( spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); + xa_lock(&pag->pag_inodes); - marked = radix_tree_tagged(&pag->pag_ici_root, mark); - radix_tree_tag_set(&pag->pag_ici_root, + marked = xa_marked(&pag->pag_inodes, mark); + __xa_set_mark(&pag->pag_inodes, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), mark); if (!marked) { /* propagate the eofblocks mark up into the perag xarray */ @@ -1624,7 +1598,7 @@ __xfs_inode_set_blocks_mark( set_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } - spin_unlock(&pag->pag_ici_lock); + xa_unlock(&pag->pag_inodes); xfs_perag_put(pag); } @@ -1653,11 +1627,11 @@ __xfs_inode_clear_blocks_mark( spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); + xa_lock(&pag->pag_inodes); - radix_tree_tag_clear(&pag->pag_ici_root, + __xa_clear_mark(&pag->pag_inodes, XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), mark); - if (!radix_tree_tagged(&pag->pag_ici_root, mark)) { + if (!xa_marked(&pag->pag_inodes, mark)) { /* clear the eofblocks mark from the perag xarray */ xa_clear_mark(&ip->i_mount->m_perags, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), @@ -1665,7 +1639,7 @@ __xfs_inode_clear_blocks_mark( clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } - spin_unlock(&pag->pag_ici_lock); + xa_unlock(&pag->pag_inodes); xfs_perag_put(pag); } @@ -1773,7 +1747,7 @@ xfs_icache_free_cowblocks( struct xfs_eofblocks *eofb) { return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks, - XFS_ICI_COWBLOCKS_TAG); + XFS_ICI_COWBLOCKS_MARK); } int diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index cc786b109e054..5086b54ddba80 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -23,11 +23,7 @@ struct xfs_eofblocks { /* * marks for inode xarray */ -#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup - in xfs_inode_ag_iterator */ -#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ -#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ -#define XFS_ICI_COWBLOCKS_TAG 2 /* inode can have cow blocks to gc */ +#define XFS_ICI_ALL XA_PRESENT /* all inodes */ #define XFS_ICI_RECLAIM_MARK XA_MARK_0 /* inode is to be reclaimed */ #define XFS_ICI_EOFBLOCKS_MARK XA_MARK_1 /* inode has blocks beyond EOF */ #define XFS_ICI_COWBLOCKS_MARK XA_MARK_2 /* inode can have cow blocks to gc */ @@ -82,7 +78,7 @@ int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), - int flags, void *args, int tag); + int flags, void *args, xa_mark_t mark); static inline int xfs_fs_eofblocks_from_user( diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6467d5e1df2dd..02d6a09b709b9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2612,7 +2612,7 @@ xfs_ifree_cluster( for (i = 0; i < igeo->inodes_per_cluster; i++) { retry: rcu_read_lock(); - ip = radix_tree_lookup(&pag->pag_ici_root, + ip = xa_load(&pag->pag_inodes, XFS_INO_TO_AGINO(mp, (inum + i))); /* Inode not in memory, nothing to do */ @@ -3466,7 +3466,7 @@ xfs_iflush_cluster( { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; - unsigned long first_index, mask; + unsigned long first_index, last_index, mask; int cilist_size; struct xfs_inode **cilist; struct xfs_inode *cip; @@ -3484,12 +3484,10 @@ xfs_iflush_cluster( mask = ~(igeo->inodes_per_cluster - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; + last_index = first_index | mask; rcu_read_lock(); - /* really need a gang lookup range call here */ - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, - first_index, igeo->inodes_per_cluster); - if (nr_found == 0) - goto out_free; + nr_found = xa_extract(&pag->pag_inodes, (void**)cilist, first_index, + last_index, igeo->inodes_per_cluster, XA_PRESENT); for (i = 0; i < nr_found; i++) { cip = cilist[i]; @@ -3508,16 +3506,6 @@ xfs_iflush_cluster( spin_unlock(&cip->i_flags_lock); continue; } - - /* - * Once we fall off the end of the cluster, no point checking - * any more inodes in the list because they will also all be - * outside the cluster. - */ - if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { - spin_unlock(&cip->i_flags_lock); - break; - } spin_unlock(&cip->i_flags_lock); /* diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 73a741f2e4a77..e9f8609b462e7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -197,9 +197,8 @@ xfs_initialize_perag( goto out_unwind_new_pags; pag->pag_agno = index; pag->pag_mount = mp; - spin_lock_init(&pag->pag_ici_lock); mutex_init(&pag->pag_ici_reclaim_lock); - INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + xa_init(&pag->pag_inodes); if (xfs_buf_hash_init(pag)) goto out_free_pag; init_waitqueue_head(&pag->pagb_wait); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 05e34dffe28b6..be063f3d531b0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -388,8 +388,7 @@ typedef struct xfs_perag { atomic_t pagf_fstrms; /* # of filestreams active in this AG */ - spinlock_t pag_ici_lock; /* incore inode cache lock */ - struct radix_tree_root pag_ici_root; /* incore inode cache root */ + struct xarray pag_inodes; /* incore inode cache */ int pag_ici_reclaimable; /* reclaimable inodes */ struct mutex pag_ici_reclaim_lock; /* serialisation point */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */