From cd9f5ca237f64c5a2ab0560ad77dedeabb70b272 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Wed, 15 Apr 2015 10:44:48 -0700 Subject: [PATCH] fuse: add numa mount option This patch adds numa mount option. When this option is enabled, FUSE groups all queues and creates one set per numa node. Users of /dev/fuse should listen on /dev/fuse from all numa nodes. Signed-off-by: Ashish Samant Reviewed-by: Srinivas Eeda --- fs/fuse/cuse.c | 2 +- fs/fuse/dev.c | 51 ++++++++++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 14 ++++++++++- fs/fuse/inode.c | 63 ++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 111 insertions(+), 19 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 921c57159752..0a30a7944cbb 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -497,7 +497,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) if (!cc) return -ENOMEM; - fuse_conn_init(&cc->fc); + fuse_conn_init(&cc->fc, FUSE_NONE); INIT_LIST_HEAD(&cc->list); cc->fc.release = cuse_fc_release; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 2802dd13fae2..d41bed1ed220 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -27,7 +27,16 @@ static struct kmem_cache *fuse_req_cachep; static inline struct fuse_node *fuse_get_node(struct fuse_conn *fc) { - return fc->fn[0]; + struct fuse_node *fn; + + if (fc->affinity == FUSE_CPU) { + fn = get_cpu_ptr(fuse_cpu); + put_cpu_ptr(fuse_cpu); + return fn; + } else if (fc->affinity == FUSE_NUMA) + return fc->fn[numa_node_id()]; + else + return fc->fn[0]; } static struct fuse_conn *fuse_get_conn(struct file *file) @@ -65,7 +74,12 @@ static struct fuse_req *__fuse_request_alloc(struct fuse_conn *fc, fn = fuse_get_node(fc); - req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL, fn->node_id); + if (fc->affinity == FUSE_CPU) + req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL, + cpu_to_node(fn->node_id)); + else + req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL, + fn->node_id); if (req) { struct page **pages; struct fuse_page_desc *page_descs; @@ -1911,6 +1925,24 @@ static struct fuse_req *request_find(struct fuse_node *fn, u64 unique) return NULL; } +static struct fuse_req *request_find_allnodes(struct fuse_conn *fc, + u64 unique) +{ + struct fuse_node *fn; + struct fuse_req *req; + int i; + + for (i = 0; i < fc->nr_nodes; i++) { + fn = fc->fn[i]; + spin_lock(&fn->lock); + req = request_find(fn, unique); + if (req) + return req; + spin_unlock(&fn->lock); + } + return NULL; +} + static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, unsigned nbytes) { @@ -1979,8 +2011,19 @@ static ssize_t fuse_dev_do_write(struct fuse_node *fn, goto err_unlock; req = request_find(fn, oh.unique); - if (!req) - goto err_unlock; + if (!req) { + /* + * responding process could be different from reaped one, so + * the responding process could be on a different node. + * Hence search all node queues for the request. + * This is a rare scenario. + */ + spin_unlock(&fn->lock); + req = request_find_allnodes(fc, oh.unique); + if (!req) + goto err_finish; + fn = req->fn; + } if (req->aborted) { spin_unlock(&fn->lock); fuse_copy_finish(cs); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2983c323c220..0d8c5628811c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -121,6 +121,9 @@ enum { FUSE_I_SIZE_UNSTABLE, }; +/** Per cpu pointer for cpu affinity */ +extern struct fuse_node __percpu *fuse_cpu; + struct fuse_conn; /** FUSE specific file data */ @@ -448,6 +451,12 @@ struct fuse_node { }; +enum affinity { + FUSE_NONE, + FUSE_CPU, + FUSE_NUMA, +}; + /** * A Fuse connection. * @@ -459,6 +468,9 @@ struct fuse_conn { /* Lock protecting accessess to members of this structure */ spinlock_t lock; + /** tracks if numa/cpu affinity is enabled/diabled */ + int affinity; + /* Number of fuse_nodes */ int nr_nodes; @@ -847,7 +859,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** * Initialize fuse_conn */ -int fuse_conn_init(struct fuse_conn *fc); +int fuse_conn_init(struct fuse_conn *fc, int affinity); /** * Release reference to fuse_conn diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 415ca448987c..ede73d097d45 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -26,6 +26,7 @@ MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; +struct fuse_node __percpu *fuse_cpu; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); @@ -69,6 +70,7 @@ struct fuse_mount_data { unsigned flags; unsigned max_read; unsigned blksize; + unsigned affinity; }; struct fuse_forget_link *fuse_alloc_forget(void) @@ -440,6 +442,7 @@ enum { OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, + OPT_NUMA_ON, OPT_ERR }; @@ -452,6 +455,7 @@ static const match_table_t tokens = { {OPT_ALLOW_OTHER, "allow_other"}, {OPT_MAX_READ, "max_read=%u"}, {OPT_BLKSIZE, "blksize=%u"}, + {OPT_NUMA_ON, "numa"}, {OPT_ERR, NULL} }; @@ -536,6 +540,9 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) return 0; d->blksize = value; break; + case OPT_NUMA_ON: + d->affinity = FUSE_NUMA; + break; default: return 0; @@ -564,6 +571,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",max_read=%u", fc->max_read); if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) seq_printf(m, ",blksize=%lu", sb->s_blocksize); + if (fc->affinity == FUSE_NUMA) + seq_puts(m, "numa"); return 0; } @@ -587,7 +596,7 @@ void fuse_node_init(struct fuse_conn *fc, struct fuse_node *fn, int i) fn->connected = 1; } -int fuse_conn_init(struct fuse_conn *fc) +int fuse_conn_init(struct fuse_conn *fc, int affinity) { int sz, ret, i; struct fuse_node *fn; @@ -606,7 +615,17 @@ int fuse_conn_init(struct fuse_conn *fc) fc->initialized = 0; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); - fc->nr_nodes = 1; + + if (affinity == FUSE_CPU) { + fc->affinity = FUSE_CPU; + fc->nr_nodes = num_possible_cpus(); + } else if (affinity == FUSE_NUMA) { + fc->affinity = FUSE_NUMA; + fc->nr_nodes = nr_node_ids; + } else { + fc->affinity = FUSE_NONE; + fc->nr_nodes = 1; + } ret = -ENOMEM; sz = sizeof(struct fuse_node *) * fc->nr_nodes; @@ -614,14 +633,28 @@ int fuse_conn_init(struct fuse_conn *fc) if (!fc->fn) return ret; memset(fc->fn, 0, sz); - sz = sizeof(struct fuse_node); - for (i = 0; i < fc->nr_nodes; i++) { - fn = kmalloc_node(sz, GFP_KERNEL, i); - if (!fn) - goto out; - memset(fn, 0, sz); - fc->fn[i] = fn; - fuse_node_init(fc, fn, i); + + if (affinity == FUSE_CPU) { + fuse_cpu = alloc_percpu(struct fuse_node); + if (fuse_cpu == NULL) { + kfree(fc->fn); + return ret; + } + for_each_possible_cpu(i) { + fn = per_cpu_ptr(fuse_cpu, i); + fc->fn[i] = fn; + fuse_node_init(fc, fn, i); + } + } else { + sz = sizeof(struct fuse_node); + for (i = 0; i < fc->nr_nodes; i++) { + fn = kmalloc_node(sz, GFP_KERNEL, i); + if (!fn) + goto out; + memset(fn, 0, sz); + fc->fn[i] = fn; + fuse_node_init(fc, fn, i); + } } return 0; out: @@ -987,8 +1020,12 @@ static void fuse_free_conn(struct fuse_conn *fc) { int i; - for (i = 0; i < fc->nr_nodes; i++) - kfree(fc->fn[i]); + if (fc->affinity == FUSE_CPU) { + free_percpu(fuse_cpu); + } else { + for (i = 0; i < fc->nr_nodes; i++) + kfree(fc->fn[i]); + } kfree_rcu(fc, rcu); } @@ -1084,7 +1121,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!fc) goto err_fput; - fuse_conn_init(fc); + fuse_conn_init(fc, d.affinity); fc->dev = sb->s_dev; fc->sb = sb; -- 2.50.1