static inline struct fuse_node *fuse_get_node(struct fuse_conn *fc)
{
- return fc->fn[0];
+ struct fuse_node *fn;
+
+ if (fc->affinity == FUSE_CPU) {
+ fn = get_cpu_ptr(fuse_cpu);
+ put_cpu_ptr(fuse_cpu);
+ return fn;
+ } else if (fc->affinity == FUSE_NUMA)
+ return fc->fn[numa_node_id()];
+ else
+ return fc->fn[0];
}
static struct fuse_conn *fuse_get_conn(struct file *file)
fn = fuse_get_node(fc);
- req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL, fn->node_id);
+ if (fc->affinity == FUSE_CPU)
+ req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL,
+ cpu_to_node(fn->node_id));
+ else
+ req = kmem_cache_alloc_node(fuse_req_cachep, GFP_KERNEL,
+ fn->node_id);
if (req) {
struct page **pages;
struct fuse_page_desc *page_descs;
return NULL;
}
+static struct fuse_req *request_find_allnodes(struct fuse_conn *fc,
+ u64 unique)
+{
+ struct fuse_node *fn;
+ struct fuse_req *req;
+ int i;
+
+ for (i = 0; i < fc->nr_nodes; i++) {
+ fn = fc->fn[i];
+ spin_lock(&fn->lock);
+ req = request_find(fn, unique);
+ if (req)
+ return req;
+ spin_unlock(&fn->lock);
+ }
+ return NULL;
+}
+
static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
unsigned nbytes)
{
goto err_unlock;
req = request_find(fn, oh.unique);
- if (!req)
- goto err_unlock;
+ if (!req) {
+ /*
+ * responding process could be different from reaped one, so
+ * the responding process could be on a different node.
+ * Hence search all node queues for the request.
+ * This is a rare scenario.
+ */
+ spin_unlock(&fn->lock);
+ req = request_find_allnodes(fc, oh.unique);
+ if (!req)
+ goto err_finish;
+ fn = req->fn;
+ }
if (req->aborted) {
spin_unlock(&fn->lock);
fuse_copy_finish(cs);
FUSE_I_SIZE_UNSTABLE,
};
+/** Per cpu pointer for cpu affinity */
+extern struct fuse_node __percpu *fuse_cpu;
+
struct fuse_conn;
/** FUSE specific file data */
};
+enum affinity {
+ FUSE_NONE,
+ FUSE_CPU,
+ FUSE_NUMA,
+};
+
/**
* A Fuse connection.
*
/* Lock protecting accessess to members of this structure */
spinlock_t lock;
+ /** tracks if numa/cpu affinity is enabled/diabled */
+ int affinity;
+
/* Number of fuse_nodes */
int nr_nodes;
/**
* Initialize fuse_conn
*/
-int fuse_conn_init(struct fuse_conn *fc);
+int fuse_conn_init(struct fuse_conn *fc, int affinity);
/**
* Release reference to fuse_conn
MODULE_LICENSE("GPL");
static struct kmem_cache *fuse_inode_cachep;
+struct fuse_node __percpu *fuse_cpu;
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
unsigned flags;
unsigned max_read;
unsigned blksize;
+ unsigned affinity;
};
struct fuse_forget_link *fuse_alloc_forget(void)
OPT_ALLOW_OTHER,
OPT_MAX_READ,
OPT_BLKSIZE,
+ OPT_NUMA_ON,
OPT_ERR
};
{OPT_ALLOW_OTHER, "allow_other"},
{OPT_MAX_READ, "max_read=%u"},
{OPT_BLKSIZE, "blksize=%u"},
+ {OPT_NUMA_ON, "numa"},
{OPT_ERR, NULL}
};
return 0;
d->blksize = value;
break;
+ case OPT_NUMA_ON:
+ d->affinity = FUSE_NUMA;
+ break;
default:
return 0;
seq_printf(m, ",max_read=%u", fc->max_read);
if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
seq_printf(m, ",blksize=%lu", sb->s_blocksize);
+ if (fc->affinity == FUSE_NUMA)
+ seq_puts(m, "numa");
return 0;
}
fn->connected = 1;
}
-int fuse_conn_init(struct fuse_conn *fc)
+int fuse_conn_init(struct fuse_conn *fc, int affinity)
{
int sz, ret, i;
struct fuse_node *fn;
fc->initialized = 0;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
- fc->nr_nodes = 1;
+
+ if (affinity == FUSE_CPU) {
+ fc->affinity = FUSE_CPU;
+ fc->nr_nodes = num_possible_cpus();
+ } else if (affinity == FUSE_NUMA) {
+ fc->affinity = FUSE_NUMA;
+ fc->nr_nodes = nr_node_ids;
+ } else {
+ fc->affinity = FUSE_NONE;
+ fc->nr_nodes = 1;
+ }
ret = -ENOMEM;
sz = sizeof(struct fuse_node *) * fc->nr_nodes;
if (!fc->fn)
return ret;
memset(fc->fn, 0, sz);
- sz = sizeof(struct fuse_node);
- for (i = 0; i < fc->nr_nodes; i++) {
- fn = kmalloc_node(sz, GFP_KERNEL, i);
- if (!fn)
- goto out;
- memset(fn, 0, sz);
- fc->fn[i] = fn;
- fuse_node_init(fc, fn, i);
+
+ if (affinity == FUSE_CPU) {
+ fuse_cpu = alloc_percpu(struct fuse_node);
+ if (fuse_cpu == NULL) {
+ kfree(fc->fn);
+ return ret;
+ }
+ for_each_possible_cpu(i) {
+ fn = per_cpu_ptr(fuse_cpu, i);
+ fc->fn[i] = fn;
+ fuse_node_init(fc, fn, i);
+ }
+ } else {
+ sz = sizeof(struct fuse_node);
+ for (i = 0; i < fc->nr_nodes; i++) {
+ fn = kmalloc_node(sz, GFP_KERNEL, i);
+ if (!fn)
+ goto out;
+ memset(fn, 0, sz);
+ fc->fn[i] = fn;
+ fuse_node_init(fc, fn, i);
+ }
}
return 0;
out:
{
int i;
- for (i = 0; i < fc->nr_nodes; i++)
- kfree(fc->fn[i]);
+ if (fc->affinity == FUSE_CPU) {
+ free_percpu(fuse_cpu);
+ } else {
+ for (i = 0; i < fc->nr_nodes; i++)
+ kfree(fc->fn[i]);
+ }
kfree_rcu(fc, rcu);
}
if (!fc)
goto err_fput;
- fuse_conn_init(fc);
+ fuse_conn_init(fc, d.affinity);
fc->dev = sb->s_dev;
fc->sb = sb;