]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
fuse: add numa mount option
authorAshish Samant <ashish.samant@oracle.com>
Wed, 15 Apr 2015 17:44:48 +0000 (10:44 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Mon, 29 Jun 2015 15:31:24 +0000 (08:31 -0700)
This patch adds numa mount option. When this option is enabled, FUSE groups
all queues and creates one set per numa node. Users of /dev/fuse should listen
on /dev/fuse from all numa nodes.

Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
fs/fuse/cuse.c
fs/fuse/dev.c
fs/fuse/fuse_i.h
fs/fuse/inode.c

index 921c57159752f0b57758557b9f92e1555df40226..0a30a7944cbbd1d3811df3302f5e4ba1f58b1be6 100644 (file)
@@ -497,7 +497,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
        if (!cc)
                return -ENOMEM;
 
-       fuse_conn_init(&cc->fc);
+       fuse_conn_init(&cc->fc, FUSE_NONE);
 
        INIT_LIST_HEAD(&cc->list);
        cc->fc.release = cuse_fc_release;
index 2802dd13fae29a79673175021456e18aeff0f26b..d41bed1ed2203be2b2b1a2fe0fccf35c9269f8a5 100644 (file)
@@ -27,7 +27,16 @@ static struct kmem_cache *fuse_req_cachep;
 
 static inline struct fuse_node *fuse_get_node(struct fuse_conn *fc)
 {
-       return fc->fn[0];
+       struct fuse_node *fn;
+
+       if (fc->affinity == FUSE_CPU) {
+               fn = get_cpu_ptr(fuse_cpu);
+               put_cpu_ptr(fuse_cpu);
+               return fn;
+       } else if (fc->affinity == FUSE_NUMA)
+               return fc->fn[numa_node_id()];
+       else
+               return fc->fn[0];
 }
 
 static struct fuse_conn *fuse_get_conn(struct file *file)
@@ -65,7 +74,12 @@ static struct fuse_req *__fuse_request_alloc(struct fuse_conn *fc,
 
        fn = fuse_get_node(fc);
 
-       req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL, fn->node_id);
+       if (fc->affinity == FUSE_CPU)
+               req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL,
+                                           cpu_to_node(fn->node_id));
+       else
+               req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL,
+                                           fn->node_id);
        if (req) {
                struct page **pages;
                struct fuse_page_desc *page_descs;
@@ -1911,6 +1925,24 @@ static struct fuse_req *request_find(struct fuse_node *fn, u64 unique)
        return NULL;
 }
 
+static struct fuse_req *request_find_allnodes(struct fuse_conn *fc,
+                                             u64 unique)
+{
+       struct fuse_node *fn;
+       struct fuse_req *req;
+       int i;
+
+       for (i = 0; i < fc->nr_nodes; i++) {
+               fn = fc->fn[i];
+               spin_lock(&fn->lock);
+               req = request_find(fn, unique);
+               if (req)
+                       return req;
+                spin_unlock(&fn->lock);
+       }
+       return NULL;
+}
+
 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
                         unsigned nbytes)
 {
@@ -1979,8 +2011,19 @@ static ssize_t fuse_dev_do_write(struct fuse_node *fn,
                goto err_unlock;
 
        req = request_find(fn, oh.unique);
-       if (!req)
-               goto err_unlock;
+       if (!req) {
+               /*
+                * responding process could be different from reaped one, so
+                * the responding process could be on a different node.
+                * Hence search all node queues for the request.
+                * This is a rare scenario.
+                */
+               spin_unlock(&fn->lock);
+               req = request_find_allnodes(fc, oh.unique);
+               if (!req)
+                       goto err_finish;
+               fn = req->fn;
+       }
        if (req->aborted) {
                spin_unlock(&fn->lock);
                fuse_copy_finish(cs);
index 2983c323c220882fafc52020583d92e92efa6adf..0d8c5628811c69bd24f482038e9764bbb6682c44 100644 (file)
@@ -121,6 +121,9 @@ enum {
        FUSE_I_SIZE_UNSTABLE,
 };
 
+/** Per cpu pointer for cpu affinity */
+extern struct fuse_node __percpu *fuse_cpu;
+
 struct fuse_conn;
 
 /** FUSE specific file data */
@@ -448,6 +451,12 @@ struct fuse_node {
 
 };
 
+enum affinity {
+       FUSE_NONE,
+       FUSE_CPU,
+       FUSE_NUMA,
+};
+
 /**
  * A Fuse connection.
  *
@@ -459,6 +468,9 @@ struct fuse_conn {
        /* Lock protecting accessess to members of this structure */
        spinlock_t lock;
 
+       /** tracks if numa/cpu affinity is enabled/diabled */
+       int affinity;
+
        /* Number of fuse_nodes */
        int nr_nodes;
 
@@ -847,7 +859,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 /**
  * Initialize fuse_conn
  */
-int fuse_conn_init(struct fuse_conn *fc);
+int fuse_conn_init(struct fuse_conn *fc, int affinity);
 
 /**
  * Release reference to fuse_conn
index 415ca448987c5a8742b423223e87d48eca120901..ede73d097d45b36ab26a8f881eec2aba4a589eb9 100644 (file)
@@ -26,6 +26,7 @@ MODULE_DESCRIPTION("Filesystem in Userspace");
 MODULE_LICENSE("GPL");
 
 static struct kmem_cache *fuse_inode_cachep;
+struct fuse_node __percpu *fuse_cpu;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
@@ -69,6 +70,7 @@ struct fuse_mount_data {
        unsigned flags;
        unsigned max_read;
        unsigned blksize;
+       unsigned affinity;
 };
 
 struct fuse_forget_link *fuse_alloc_forget(void)
@@ -440,6 +442,7 @@ enum {
        OPT_ALLOW_OTHER,
        OPT_MAX_READ,
        OPT_BLKSIZE,
+       OPT_NUMA_ON,
        OPT_ERR
 };
 
@@ -452,6 +455,7 @@ static const match_table_t tokens = {
        {OPT_ALLOW_OTHER,               "allow_other"},
        {OPT_MAX_READ,                  "max_read=%u"},
        {OPT_BLKSIZE,                   "blksize=%u"},
+       {OPT_NUMA_ON,                   "numa"},
        {OPT_ERR,                       NULL}
 };
 
@@ -536,6 +540,9 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
                                return 0;
                        d->blksize = value;
                        break;
+               case OPT_NUMA_ON:
+                        d->affinity = FUSE_NUMA;
+                        break;
 
                default:
                        return 0;
@@ -564,6 +571,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
                seq_printf(m, ",max_read=%u", fc->max_read);
        if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
                seq_printf(m, ",blksize=%lu", sb->s_blocksize);
+       if (fc->affinity == FUSE_NUMA)
+               seq_puts(m, "numa");
        return 0;
 }
 
@@ -587,7 +596,7 @@ void fuse_node_init(struct fuse_conn *fc, struct fuse_node *fn, int i)
        fn->connected = 1;
 }
 
-int fuse_conn_init(struct fuse_conn *fc)
+int fuse_conn_init(struct fuse_conn *fc, int affinity)
 {
        int sz, ret, i;
        struct fuse_node *fn;
@@ -606,7 +615,17 @@ int fuse_conn_init(struct fuse_conn *fc)
        fc->initialized = 0;
        fc->attr_version = 1;
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-       fc->nr_nodes = 1;
+
+       if (affinity == FUSE_CPU) {
+               fc->affinity = FUSE_CPU;
+               fc->nr_nodes = num_possible_cpus();
+       } else if (affinity == FUSE_NUMA) {
+               fc->affinity = FUSE_NUMA;
+               fc->nr_nodes = nr_node_ids;
+       } else {
+               fc->affinity = FUSE_NONE;
+               fc->nr_nodes = 1;
+       }
 
        ret = -ENOMEM;
        sz = sizeof(struct fuse_node *) * fc->nr_nodes;
@@ -614,14 +633,28 @@ int fuse_conn_init(struct fuse_conn *fc)
        if (!fc->fn)
                return ret;
        memset(fc->fn, 0, sz);
-       sz = sizeof(struct fuse_node);
-       for (i = 0; i < fc->nr_nodes; i++) {
-               fn = kmalloc_node(sz, GFP_KERNEL, i);
-               if (!fn)
-                       goto out;
-               memset(fn, 0, sz);
-               fc->fn[i] = fn;
-               fuse_node_init(fc, fn, i);
+
+       if (affinity == FUSE_CPU) {
+               fuse_cpu = alloc_percpu(struct fuse_node);
+               if (fuse_cpu == NULL) {
+                       kfree(fc->fn);
+                       return ret;
+               }
+               for_each_possible_cpu(i) {
+                       fn = per_cpu_ptr(fuse_cpu, i);
+                       fc->fn[i] = fn;
+                       fuse_node_init(fc, fn, i);
+               }
+       } else {
+               sz = sizeof(struct fuse_node);
+               for (i = 0; i < fc->nr_nodes; i++) {
+                       fn = kmalloc_node(sz, GFP_KERNEL, i);
+                       if (!fn)
+                               goto out;
+                       memset(fn, 0, sz);
+                       fc->fn[i] = fn;
+                       fuse_node_init(fc, fn, i);
+               }
        }
        return 0;
 out:
@@ -987,8 +1020,12 @@ static void fuse_free_conn(struct fuse_conn *fc)
 {
        int i;
 
-       for (i = 0; i < fc->nr_nodes; i++)
-               kfree(fc->fn[i]);
+       if (fc->affinity == FUSE_CPU) {
+               free_percpu(fuse_cpu);
+       } else {
+               for (i = 0; i < fc->nr_nodes; i++)
+                       kfree(fc->fn[i]);
+       }
        kfree_rcu(fc, rcu);
 }
 
@@ -1084,7 +1121,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (!fc)
                goto err_fput;
 
-       fuse_conn_init(fc);
+       fuse_conn_init(fc, d.affinity);
 
        fc->dev = sb->s_dev;
        fc->sb = sb;