INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
+ device->relaxed_pd = NULL;
+ INIT_LIST_HEAD(&device->relaxed_pool_list);
ret = read_port_table_lengths(device);
if (ret) {
* pool_lock to maintain consistency.
*/
+#define FMR_SPLIT_COUNT 3
+
struct ib_fmr_pool {
spinlock_t pool_lock;
+ spinlock_t used_pool_lock;
int pool_size;
int max_pages;
int dirty_watermark;
int dirty_len;
struct list_head free_list;
+ struct list_head used_list;
struct list_head dirty_list;
struct hlist_head *cache_bucket;
atomic_t flush_ser;
wait_queue_head_t force_wait;
+ struct ib_pd *pd;
+ int relaxed;
};
static inline u32 ib_fmr_hash(u64 first_page)
static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
u64 *page_list,
int page_list_len,
- u64 io_virtual_address)
+ u64 io_virtual_address,
+ struct ib_pd *pd)
{
struct hlist_head *bucket;
struct ib_pool_fmr *fmr;
hlist_for_each_entry(fmr, bucket, cache_node)
if (io_virtual_address == fmr->io_virtual_address &&
page_list_len == fmr->page_list_len &&
+ pd == fmr->pd &&
!memcmp(page_list, fmr->page_list,
page_list_len * sizeof *page_list))
return fmr;
return NULL;
}
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
+
+static void fmr_teardown_mr(struct ib_pool_fmr *fmr)
+{
+
+ if (fmr->sg_len) {
+ ib_dma_unmap_sg(fmr->pd->device,
+ fmr->sg, fmr->sg_len,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Release the s/g list */
+ if (fmr->sg_len) {
+ unsigned int i;
+
+ for (i = 0; i < fmr->sg_len; ++i) {
+ struct page *page = sg_page(&fmr->sg[i]);
+
+ /* FIXME we need a way to tell a r/w MR
+ * from a r/o MR */
+ BUG_ON(irqs_disabled());
+ set_page_dirty(page);
+ put_page(page);
+ }
+ kfree(fmr->sg);
+
+ fmr->sg = NULL;
+ fmr->sg_len = 0;
+ }
+}
+
+static void ib_fmr_batch_release(struct ib_fmr_pool *pool, int unmap_usedonce)
{
int ret;
struct ib_pool_fmr *fmr;
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
+ if (unmap_usedonce) {
+ /* force a flush */
+ struct ib_pool_fmr *fmr;
+ int already_split = 0;
+ int count = 0;
+ LIST_HEAD(temp_list);
+
+ spin_lock_irq(&pool->used_pool_lock);
+ list_splice_init(&pool->used_list, &temp_list);
+ spin_unlock_irq(&pool->used_pool_lock);
+ list_for_each_entry(fmr, &temp_list, list) {
+ /* find first fmr that is not mapped yet */
+ if (fmr->remap_count == 0 ||
+ (count > (pool->pool_size / FMR_SPLIT_COUNT))) {
+ /* split the list 2 two */
+ list_cut_position(&unmap_list, &temp_list,
+ &fmr->list);
+ spin_lock_irq(&pool->used_pool_lock);
+ list_splice(&temp_list, &pool->used_list);
+ spin_unlock_irq(&pool->used_pool_lock);
+ already_split = 1;
+ break;
+ } else {
+ hlist_del_init(&fmr->cache_node);
+ fmr->remap_count = 0;
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+ count++;
+ }
+ }
+
+ if (!already_split) {
+ /* All are mapped once */
+ list_splice_tail(&temp_list, &unmap_list);
+ }
+ if (!list_empty(&unmap_list)) {
+ ret = ib_unmap_fmr(&fmr_list);
+ if (ret)
+ pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
+
+ if (pool->relaxed) {
+ list_for_each_entry(fmr, &unmap_list, list) {
+ fmr_teardown_mr(fmr);
+ }
+ }
+ spin_lock_irq(&pool->pool_lock);
+ list_splice(&unmap_list, &pool->free_list);
+ spin_unlock_irq(&pool->pool_lock);
+ }
+ INIT_LIST_HEAD(&unmap_list);
+ INIT_LIST_HEAD(&fmr_list);
+
+
+ }
+
spin_lock_irq(&pool->pool_lock);
list_for_each_entry(fmr, &pool->dirty_list, list) {
#ifdef DEBUG
if (fmr->ref_count !=0) {
- printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
- fmr, fmr->ref_count);
+ pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
+ fmr, fmr->ref_count);
}
#endif
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
- printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
+ pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
+
+ if (pool->relaxed) {
+ list_for_each_entry(fmr, &unmap_list, list) {
+ fmr_teardown_mr(fmr);
+ }
+ }
spin_lock_irq(&pool->pool_lock);
list_splice(&unmap_list, &pool->free_list);
static int ib_fmr_cleanup_thread(void *pool_ptr)
{
struct ib_fmr_pool *pool = pool_ptr;
+ int time_left = 1;
do {
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
+ ib_fmr_batch_release(pool, 0);
+ time_left = 1;
atomic_inc(&pool->flush_ser);
wake_up_interruptible(&pool->force_wait);
pool->flush_function(pool, pool->flush_arg);
}
+ if (!time_left && pool->relaxed) {
+ ib_fmr_batch_release(pool, 1);
+
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
+ }
+
set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
+ !kthread_should_stop()) {
+ /* run once in 1 sec */
+ time_left = schedule_timeout((HZ/FMR_SPLIT_COUNT)/20);
+ }
__set_current_state(TASK_RUNNING);
} while (!kthread_should_stop());
return 0;
}
+
/**
* ib_create_fmr_pool - Create an FMR pool
* @pd:Protection domain for FMRs
if (!params)
return ERR_PTR(-EINVAL);
+ if (params->cache && params->relaxed)
+ return ERR_PTR(-EINVAL);
+
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
- printk(KERN_INFO PFX "Device %s does not support FMRs\n",
- device->name);
+ pr_warn(PFX "Device %s does not support FMRs\n",
+ device->name);
+ return ERR_PTR(-ENOSYS);
+ }
+
+ if (params->relaxed && !device->set_fmr_pd) {
+ pr_warn(PFX "Device %s does not support relaxed FMRs\n",
+ device->name);
return ERR_PTR(-ENOSYS);
}
+
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
- printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
+ pr_warn(PFX "couldn't allocate device attr struct\n");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
- printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
+ pr_warn(PFX "couldn't query device: %d\n", ret);
kfree(attr);
return ERR_PTR(ret);
}
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
- printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
+ pr_warn(PFX "couldn't allocate pool struct\n");
return ERR_PTR(-ENOMEM);
}
pool->flush_arg = params->flush_arg;
INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->used_list);
INIT_LIST_HEAD(&pool->dirty_list);
if (params->cache) {
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
- printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
+ pr_warn(PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
pool->dirty_watermark = params->dirty_watermark;
pool->dirty_len = 0;
spin_lock_init(&pool->pool_lock);
+ spin_lock_init(&pool->used_pool_lock);
atomic_set(&pool->req_ser, 0);
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
+ pool->pd = pd;
+ pool->relaxed = params->relaxed;
pool->thread = kthread_run(ib_fmr_cleanup_thread,
pool,
"ib_fmr(%s)",
device->name);
if (IS_ERR(pool->thread)) {
- printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
+ pr_warn(PFX "couldn't start cleanup thread\n");
ret = PTR_ERR(pool->thread);
goto out_free_pool;
}
for (i = 0; i < params->pool_size; ++i) {
fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
if (!fmr) {
- printk(KERN_WARNING PFX "failed to allocate fmr "
+ pr_warn(PFX "failed to allocate fmr "
"struct for FMR %d\n", i);
goto out_fail;
}
fmr->pool = pool;
fmr->remap_count = 0;
fmr->ref_count = 0;
+ fmr->pd = pd;
+ fmr->page_list_len = 0;
+ fmr->sg = NULL;
+ fmr->sg_len = 0;
INIT_HLIST_NODE(&fmr->cache_node);
fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
- printk(KERN_WARNING PFX "fmr_create failed "
+ pr_warn(PFX "fmr_create failed "
"for FMR %d\n", i);
kfree(fmr);
goto out_fail;
int i;
kthread_stop(pool->thread);
- ib_fmr_batch_release(pool);
+ ib_fmr_batch_release(pool, 0);
i = 0;
list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
+ ib_set_fmr_pd(fmr->fmr, pool->pd);
+ ib_dealloc_fmr(fmr->fmr);
+ list_del(&fmr->list);
+ kfree(fmr);
+ ++i;
+ }
+ list_for_each_entry_safe(fmr, tmp, &pool->used_list, list) {
if (fmr->remap_count) {
INIT_LIST_HEAD(&fmr_list);
list_add_tail(&fmr->fmr->list, &fmr_list);
ib_unmap_fmr(&fmr_list);
+ if (pool->relaxed)
+ fmr_teardown_mr(fmr);
}
+ ib_set_fmr_pd(fmr->fmr, pool->pd);
ib_dealloc_fmr(fmr->fmr);
list_del(&fmr->list);
kfree(fmr);
}
if (i < pool->pool_size)
- printk(KERN_WARNING PFX "pool still has %d regions registered\n",
+ pr_warn(PFX "pool still has %d regions registered\n",
pool->pool_size - i);
kfree(pool->cache_bucket);
int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
{
int serial;
- struct ib_pool_fmr *fmr, *next;
/*
* The free_list holds FMRs that may have been used
* Put them on the dirty list now so that the cleanup
* thread will reap them too.
*/
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
+ spin_lock_irq(&pool->used_pool_lock);
+ list_splice_init(&pool->used_list, &pool->dirty_list);
+ spin_unlock_irq(&pool->used_pool_lock);
serial = atomic_inc_return(&pool->req_ser);
wake_up_process(pool->thread);
* @page_list:List of pages to map
* @list_len:Number of pages in @page_list
* @io_virtual_address:I/O virtual address for new FMR
+ * @rargs: argument sepecified when relaxed MR is used.
*
* Map an FMR from an FMR pool.
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
int list_len,
- u64 io_virtual_address)
+ u64 io_virtual_address,
+ struct ib_fmr_args_relaxed *rargs)
{
struct ib_fmr_pool *pool = pool_handle;
struct ib_pool_fmr *fmr;
if (list_len < 1 || list_len > pool->max_pages)
return ERR_PTR(-EINVAL);
+ if (pool->relaxed && rargs == NULL)
+ return ERR_PTR(-EINVAL);
+
+
spin_lock_irqsave(&pool->pool_lock, flags);
fmr = ib_fmr_cache_lookup(pool,
page_list,
list_len,
- io_virtual_address);
+ io_virtual_address, rargs ? rargs->pd : NULL);
if (fmr) {
/* found in cache */
++fmr->ref_count;
if (list_empty(&pool->free_list)) {
spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
+ spin_lock_irqsave(&pool->used_pool_lock, flags);
+ if (list_empty(&pool->used_list)) {
+ spin_unlock_irqrestore(&pool->used_pool_lock, flags);
+ return ERR_PTR(-EAGAIN);
+ }
+ fmr = list_entry(pool->used_list.next, struct ib_pool_fmr,
+ list);
+ list_del(&fmr->list);
+ hlist_del_init(&fmr->cache_node);
+ spin_unlock_irqrestore(&pool->used_pool_lock, flags);
+ } else {
+ fmr = list_entry(pool->free_list.next, struct ib_pool_fmr,
+ list);
+ list_del(&fmr->list);
+ hlist_del_init(&fmr->cache_node);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
}
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ if (pool->relaxed && fmr->pd != rargs->pd) {
+ result = ib_set_fmr_pd(fmr->fmr, rargs->pd);
+ if (result) {
+ spin_lock_irqsave(&pool->used_pool_lock, flags);
+ list_add(&fmr->list, &pool->used_list);
+ spin_unlock_irqrestore(&pool->used_pool_lock, flags);
+
+ pr_warn(PFX "set_fmr_pd returns %d\n", result);
+
+ return ERR_PTR(result);
+ }
+ }
result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
io_virtual_address);
if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ spin_lock_irqsave(&pool->used_pool_lock, flags);
+ list_add(&fmr->list, &pool->used_list);
+ spin_unlock_irqrestore(&pool->used_pool_lock, flags);
- printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
+ pr_warn(PFX "fmr_map returns %d\n", result);
return ERR_PTR(result);
}
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
+ if (pool->relaxed) {
+ fmr->pd = rargs->pd;
+ /* if it was mapped earlier */
+ if (fmr->remap_count > 1)
+ fmr_teardown_mr(fmr);
+
+ fmr->sg = rargs->sg;
+ fmr->sg_len = rargs->sg_len;
+ }
+
return fmr;
}
EXPORT_SYMBOL(ib_fmr_pool_map_phys);
pool = fmr->pool;
- spin_lock_irqsave(&pool->pool_lock, flags);
+ spin_lock_irqsave(&pool->used_pool_lock, flags);
--fmr->ref_count;
if (!fmr->ref_count) {
if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
+ list_add_tail(&fmr->list, &pool->used_list);
} else {
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
#ifdef DEBUG
if (fmr->ref_count < 0)
- printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
+ pr_warn(PFX "FMR %p has ref count %d < 0\n",
fmr, fmr->ref_count);
#endif
- spin_unlock_irqrestore(&pool->pool_lock, flags);
+ spin_unlock_irqrestore(&pool->used_pool_lock, flags);
return 0;
}
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_fmr_pool.h>
#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
do { \
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_shpd_idr;
extern struct idr ib_uverbs_mr_idr;
+extern struct idr ib_uverbs_fmr_idr;
extern struct idr ib_uverbs_mw_idr;
extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
IB_UVERBS_DECLARE_CMD(close_xrcd);
IB_UVERBS_DECLARE_CMD(alloc_shpd);
IB_UVERBS_DECLARE_CMD(share_pd);
+IB_UVERBS_DECLARE_CMD(reg_mr_relaxed);
+IB_UVERBS_DECLARE_CMD(dereg_mr_relaxed);
+IB_UVERBS_DECLARE_CMD(flush_relaxed_mr);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
static struct uverbs_lock_class shpd_lock_class = { .name = "SHPD-uobj" };
static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
+static struct uverbs_lock_class fmr_lock_class = { .name = "FMR-uobj"};
static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
put_uobj_read(uobj);
}
+/*
+ * get the number of pages by looking at the page indices that the start and
+ * end addresses fall in.
+ *
+ * Returns 0 if the vec is invalid. It is invalid if the number of bytes
+ * causes the address to wrap or overflows an unsigned int. This comes
+ * from being stored in the 'length' member of 'struct scatterlist'.
+ */
+static unsigned int get_pages_in_range(u64 addr, u64 bytes)
+{
+ if ((addr + bytes <= addr) ||
+ (bytes > (u64)UINT_MAX))
+ return 0;
+
+ return ((addr + bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (addr >> PAGE_SHIFT);
+}
+
+/* Pin user pages*/
+static int fmr_pin_pages(unsigned long user_addr, unsigned int nr_pages,
+ struct page **pages, int write)
+{
+ int ret;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, user_addr,
+ nr_pages, write, 0, pages, NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ if (0 <= ret && (unsigned) ret < nr_pages) {
+ while (ret--)
+ put_page(pages[ret]);
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+static int create_fmr_pool(struct ib_pd *pd, int pages, u32 access)
+{
+
+ int ret = 0;
+ struct ib_fmr_pool_param fmr_param;
+ struct ib_fmr_pool *fmr_pool;
+ struct ib_relaxed_pool_data *pool_data;
+ struct ib_relaxed_pool_data *pos;
+ int found = 0;
+
+ /*create pools - 32k fmrs of 8k buf, 4k fmrs of 1meg */
+ memset(&fmr_param, 0, sizeof(fmr_param));
+ fmr_param.pool_size = (pages > 20) ? 8 * 1024 : 32*1024;
+ fmr_param.dirty_watermark = 512;
+ fmr_param.cache = 0;
+ fmr_param.relaxed = 1;
+ fmr_param.max_pages_per_fmr = pages;
+ fmr_param.page_shift = PAGE_SHIFT;
+ fmr_param.access = access;
+
+ fmr_pool = ib_create_fmr_pool(pd, &fmr_param);
+
+ if (IS_ERR(fmr_pool)) {
+ ret = PTR_ERR(fmr_pool);
+ goto err_exit;
+ }
+
+ pool_data = kmalloc(sizeof(*pool_data), GFP_KERNEL);
+
+ if (!pool_data) {
+ ret = -ENOMEM;
+ (void)ib_destroy_fmr_pool(fmr_pool);
+ goto err_exit;
+ }
+
+ pool_data->fmr_pool = fmr_pool;
+ pool_data->access_flags = access;
+ pool_data->max_pages = pages;
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list, pool_list) {
+ if (pages <= pos->max_pages) {
+ list_add_tail(&pool_data->pool_list, &pos->pool_list);
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ list_add_tail(&pool_data->pool_list,
+ &pd->device->relaxed_pool_list);
+
+#ifdef DEBUG
+ pr_info("FMR POOLS :\n");
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list, pool_list) {
+ pr_info("\t pos -> %p, pages = %d, access = %x, pool = %p\n",
+ pos, pos->max_pages, pos->access_flags,
+ pos->fmr_pool);
+ }
+#endif
+
+ return 0;
+
+err_exit:
+ return ret;
+}
+
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
ucontext->device = ibdev;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
+ INIT_LIST_HEAD(&ucontext->fmr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
INIT_LIST_HEAD(&ucontext->cq_list);
INIT_LIST_HEAD(&ucontext->qp_list);
struct ib_uobject *shuobj = 0;
struct ib_pd *pd = NULL;
struct ib_shpd *shpd = NULL;
+ struct ib_relaxed_pool_data *pos;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
pd = uobj->object;
+ /* flush all pd reference from HCA - relaxed FMR */
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list, pool_list) {
+ ib_flush_fmr_pool(pos->fmr_pool);
+ }
+
/* is pd shared ?*/
if (pd->shpd) {
shpd = pd->shpd;
return ret;
}
+ssize_t ib_uverbs_reg_mr_relaxed(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ int ret;
+
+ struct ib_relaxed_pool_data *pos;
+ struct ib_fmr_args_relaxed rel_args;
+ unsigned int n;
+ int found = 0;
+ struct page **pages;
+ int page_cnt;
+ u64 *dma_pages;
+ struct scatterlist *sg;
+ struct ib_pool_fmr *fmr;
+ int fmr_mapped = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long) cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
+
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (cmd.access_flags &
+ (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
+ /* FMRs are limited to less than 1M for now */
+ if (cmd.length >= (1*1024*1024 + PAGE_SIZE - 1))
+ return -EINVAL;
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ init_uobj(uobj, 0, file->ucontext, &fmr_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ /* Relaxed MR */
+ /* pd->device has a list of FMR pools, sorted by size & access_flags */
+ /* if pool is already available use that pool and map the address. if
+ it is not available then allocate a new pool & allocate from there */
+ {
+
+ n = get_pages_in_range(cmd.start, cmd.length);
+ if (n == 0) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ found = 0;
+
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list, pool_list) {
+ if (cmd.access_flags == pos->access_flags
+ && n <= pos->max_pages){
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ int pagesin8K = (8*1024 + PAGE_SIZE) >> PAGE_SHIFT;
+ int pagesin1M = (1024*1024 + PAGE_SIZE) >> PAGE_SHIFT;
+ struct ib_pd *pool_pd = file->device->ib_dev->relaxed_pd;
+
+ /* Create pool for 8kb buffers */
+ ret = create_fmr_pool(pool_pd, pagesin8K, cmd.access_flags);
+ if (ret < 0)
+ goto err_put;
+
+ /* Create pool for 1mb buffers */
+ ret = create_fmr_pool(pool_pd, pagesin1M, cmd.access_flags);
+ if (ret < 0)
+ goto err_put;
+
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list,
+ pool_list) {
+ if (cmd.access_flags == pos->access_flags
+ && n <= pos->max_pages){
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+ }
+
+
+ pages = kcalloc(n, sizeof(struct page *), GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto err_put;
+ }
+
+ ret = fmr_pin_pages(cmd.start & PAGE_MASK, n, pages,
+ cmd.access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ if (ret < 0)
+ goto err_pages_alloc;
+
+
+ /* TODO: define following as a separate function */
+ if (1) {
+ u32 len = 0;
+ int sg_dma_len;
+ int i, j;
+
+ page_cnt = 0;
+
+ sg = kcalloc(n, sizeof(*sg), GFP_KERNEL);
+ if (sg == NULL) {
+ ret = -ENOMEM;
+ goto err_unpin;
+ }
+ sg_init_table(sg, n);
+ /* Stick all pages into the scatterlist */
+ for (i = 0 ; i < n; i++)
+ sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
+
+ sg_dma_len = ib_dma_map_sg(pd->device, sg, n,
+ DMA_BIDIRECTIONAL);
+ if (unlikely(!sg_dma_len)) {
+ pr_warn("RFMR/IB: dma_map_sg failed!\n");
+ ret = -EBUSY;
+ goto err_free_sg;
+ }
+
+
+ for (i = 0; i < sg_dma_len; ++i) {
+ unsigned int dma_len = ib_sg_dma_len(pd->device,
+ &sg[i]);
+ u64 dma_addr = ib_sg_dma_address(pd->device, &sg[i]);
+
+ if (dma_addr & ~PAGE_MASK) {
+ if (i > 0) {
+ ret = -EINVAL;
+ goto err_free_sg;
+ } else
+ ++page_cnt;
+ }
+ if ((dma_addr + dma_len) & ~PAGE_MASK) {
+ if (i < sg_dma_len - 1) {
+ ret = -EINVAL;
+ goto err_free_sg;
+ } else
+ ++page_cnt;
+ }
+
+ len += dma_len;
+ }
+
+ page_cnt += len >> PAGE_SHIFT;
+
+ dma_pages = kmalloc_array(page_cnt, sizeof(u64), GFP_ATOMIC);
+ if (!dma_pages) {
+ ret = -ENOMEM;
+ goto err_free_sg;
+ }
+
+ page_cnt = 0;
+ for (i = 0; i < sg_dma_len; ++i) {
+ unsigned int dma_len = ib_sg_dma_len(pd->device,
+ &sg[i]);
+ u64 dma_addr = ib_sg_dma_address(pd->device, &sg[i]);
+
+ for (j = 0; j < dma_len; j += PAGE_SIZE) {
+ dma_pages[page_cnt++] =
+ (dma_addr & PAGE_MASK) + j;
+ }
+ }
+ }
+
+
+ rel_args.pd = pd;
+ rel_args.sg = sg;
+ rel_args.sg_len = n;
+
+ fmr = ib_fmr_pool_map_phys(pos->fmr_pool, dma_pages, page_cnt,
+ cmd.hca_va & PAGE_MASK, &rel_args);
+
+ kfree(dma_pages);
+
+ if (IS_ERR(fmr)) {
+ ret = PTR_ERR(fmr);
+ goto err_free_sg;
+ }
+
+ fmr_mapped = 1;
+
+ kfree(pages);
+
+ }
+
+ fmr->fmr->device = pd->device;
+ fmr->fmr->pd = pd;
+ atomic_inc(&pd->usecnt);
+
+ uobj->object = fmr;
+ ret = idr_add_uobj(&ib_uverbs_fmr_idr, uobj);
+ if (ret)
+ goto err_unreg;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = fmr->fmr->lkey;
+ resp.rkey = fmr->fmr->rkey;
+ resp.mr_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->fmr_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return in_len;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_fmr_idr, uobj);
+
+err_unreg:
+ ib_fmr_pool_unmap(fmr);
+
+err_free_sg:
+ /* if mapped already, this will be freed while flushing */
+ if (!fmr_mapped)
+ kfree(sg);
+
+err_unpin:
+ /* if mapped already, pages will be unpinned during flushing */
+ if (!fmr_mapped)
+ while (n--)
+ put_page(pages[n]);
+
+err_pages_alloc:
+ kfree(pages);
+
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
return in_len;
}
+ssize_t ib_uverbs_dereg_mr_relaxed(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+ struct ib_pool_fmr *fmr;
+ struct ib_pd *pd;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_fmr_idr, cmd.mr_handle,
+ file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ fmr = uobj->object;
+ pd = fmr->fmr->pd;
+
+ ret = ib_fmr_pool_unmap(fmr);
+ if (!ret)
+ uobj->live = 0;
+
+ put_uobj_write(uobj);
+
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+
+ idr_remove_uobj(&ib_uverbs_fmr_idr, uobj);
+
+ mutex_lock(&file->mutex);
+ list_del(&uobj->list);
+ mutex_unlock(&file->mutex);
+
+ put_uobj(uobj);
+
+ return in_len;
+}
+
+ssize_t ib_uverbs_flush_relaxed_mr(struct ib_uverbs_file *file,
+ const char __user *buf,
+ int in_len, int out_len)
+{
+ struct ib_uverbs_flush_relaxed_mr cmd;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_relaxed_pool_data *pos;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
+ if (!uobj)
+ return -EINVAL;
+
+ /* flush all the pools associated with the pd */
+ pd = uobj->object;
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list, pool_list) {
+ ib_flush_fmr_pool(pos->fmr_pool);
+ }
+
+ put_uobj_write(uobj);
+
+ return in_len;
+}
+
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_shpd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
+DEFINE_IDR(ib_uverbs_fmr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
*/
[IB_USER_VERBS_CMD_ALLOC_SHPD] = ib_uverbs_alloc_shpd,
[IB_USER_VERBS_CMD_SHARE_PD] = ib_uverbs_share_pd,
+ [IB_USER_VERBS_CMD_REG_MR_RELAXED] = ib_uverbs_reg_mr_relaxed,
+ [IB_USER_VERBS_CMD_DEREG_MR_RELAXED] = ib_uverbs_dereg_mr_relaxed,
+ [IB_USER_VERBS_CMD_FLUSH_RELAXED_MR] = ib_uverbs_flush_relaxed_mr,
};
static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
kfree(uobj);
}
+ list_for_each_entry_safe(uobj, tmp, &context->fmr_list, list) {
+ struct ib_pool_fmr *fmr = uobj->object;
+
+ idr_remove_uobj(&ib_uverbs_fmr_idr, uobj);
+ ib_fmr_pool_unmap(fmr);
+ kfree(uobj);
+ }
+
mutex_lock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
struct ib_pd *pd = uobj->object;
struct ib_uobject *shuobj = NULL;
struct ib_shpd *shpd = NULL;
+ struct ib_relaxed_pool_data *pos;
+
+ /* flush fmr pool associated with this pd */
+ list_for_each_entry(pos, &pd->device->relaxed_pool_list,
+ pool_list) {
+ ib_flush_fmr_pool(pos->fmr_pool);
+ }
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
+ device->relaxed_pd = ib_alloc_pd(device);
+ if (IS_ERR(device->relaxed_pd)) {
+ device->relaxed_pd = NULL;
+ goto err_class;
+ }
+
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
static void ib_uverbs_remove_one(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
+ struct ib_relaxed_pool_data *pos;
+ struct ib_relaxed_pool_data *tmp;
+ int ret = 0;
if (!uverbs_dev)
return;
+ list_for_each_entry_safe(pos, tmp, &device->relaxed_pool_list,
+ pool_list) {
+ ib_destroy_fmr_pool(pos->fmr_pool);
+ list_del(&pos->pool_list);
+ kfree(pos);
+ }
+
+ ret = ib_dealloc_pd(device->relaxed_pd);
+ device->relaxed_pd = NULL;
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_shpd_idr);
idr_destroy(&ib_uverbs_mr_idr);
+ idr_destroy(&ib_uverbs_fmr_idr);
idr_destroy(&ib_uverbs_mw_idr);
idr_destroy(&ib_uverbs_ah_idr);
idr_destroy(&ib_uverbs_cq_idr);
}
EXPORT_SYMBOL(ib_alloc_fmr);
+int ib_set_fmr_pd(struct ib_fmr *fmr, struct ib_pd *pd)
+{
+ int ret = 0;
+
+ if (fmr->device->set_fmr_pd) {
+ ret = fmr->device->set_fmr_pd(fmr, pd);
+ if (!ret)
+ fmr->pd = pd;
+
+ return ret;
+ } else
+ return -ENOSYS;
+}
+EXPORT_SYMBOL(ib_set_fmr_pd);
+
+
int ib_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *fmr;
fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_vec->pages,
page_vec->length,
- page_vec->pages[0]);
+ page_vec->pages[0],
+ NULL);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
u64 io_addr = 0;
fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
- state->npages, io_addr);
+ state->npages, io_addr, NULL);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
void *arg);
void *flush_arg;
unsigned cache:1;
+ unsigned relaxed:1;
};
struct ib_pool_fmr {
int ref_count;
int remap_count;
u64 io_virtual_address;
+ struct ib_pd *pd;
+ int list_id;
+ struct scatterlist *sg;
+ int sg_len;
int page_list_len;
u64 page_list[0];
};
+struct ib_fmr_args_relaxed {
+ struct ib_pd *pd;
+ struct scatterlist *sg;
+ int sg_len;
+};
+
struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
struct ib_fmr_pool_param *params);
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
int list_len,
- u64 io_virtual_address);
+ u64 io_virtual_address,
+ struct ib_fmr_args_relaxed *rargs);
int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+
#endif /* IB_FMR_POOL_H */
struct ib_device *device;
struct list_head pd_list;
struct list_head mr_list;
+ struct list_head fmr_list;
struct list_head mw_list;
struct list_head cq_list;
struct list_head qp_list;
int (*remove_shpd)(struct ib_device *ibdev,
struct ib_shpd *shpd,
int atinit);
+ int (*set_fmr_pd)(struct ib_fmr *fmr,
+ struct ib_pd *pd);
struct ib_dma_mapping_ops *dma_ops;
u32 local_dma_lkey;
u8 node_type;
u8 phys_port_cnt;
+ struct ib_pd *relaxed_pd;
+ struct list_head relaxed_pool_list;
+};
+
+struct ib_relaxed_pool_data {
+ struct ib_fmr_pool *fmr_pool;
+ u32 access_flags;
+ int max_pages;
+ struct list_head pool_list;
};
struct ib_client {
int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
+/**
+ * ib_set_fmr_pd - set new PD for an FMR
+ * @fmr: The fast memory region to associate with the pd.
+ * @pd: new pd.
+ */
+int ib_set_fmr_pd(struct ib_fmr *fmr, struct ib_pd *pd);
+
/**
* ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
* @fmr: The fast memory region to associate with the pages.
IB_USER_VERBS_CMD_ALLOC_SHPD = IB_USER_VERBS_CMD_ORACLE_ADDS_START,
/* =46 */
IB_USER_VERBS_CMD_SHARE_PD, /* =47 */
+ IB_USER_VERBS_CMD_REG_MR_RELAXED, /* =48 */
+ IB_USER_VERBS_CMD_DEREG_MR_RELAXED, /* =49 */
+ IB_USER_VERBS_CMD_FLUSH_RELAXED_MR, /* =50 */
};
enum {
__u32 mw_handle;
};
+struct ib_uverbs_flush_relaxed_mr {
+ __u32 pd_handle;
+};
+
+
struct ib_uverbs_create_comp_channel {
__u64 response;
};