From 3f02ea4f2e305b6f1ce8eab0bd92a0949dc5463f Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Fri, 14 Oct 2016 13:27:26 +0200 Subject: [PATCH] sif: pt: Add support for single thread modified page tables Modifications to the page tables in sif_pt is protected by a lock to allow multiple threads to add and subtract regions to/from the page table in parallel. This functionality is currently only needed/used by the special sq_cmpl page table handling. In the future we might however need this also for other cases, for instance to optimize further on page table memory usage. The kernel documentation for infiniband midlayer locking requires that map_phys_fmr should be callable from any context. This prevents us from blocking on a lock, something that happens if there are contention for the lock (eg. more than one thread involved in modifying the page table) Implement another flag: thread_safe in a pt that determines if a page table is going to need to be modified from multiple threads simultaneously. For now keep a BUG_ON if the code is attempted accessed in parallel for memory types that should not ever see parallel access. Orabug: 24836269 Signed-off-by: Knut Omang Reviewed-by: Francisco Trivino-Garcia --- drivers/infiniband/hw/sif/sif_mmu.c | 6 +-- drivers/infiniband/hw/sif/sif_pt.c | 62 +++++++++++++++++++++-------- drivers/infiniband/hw/sif/sif_pt.h | 10 ++++- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/sif/sif_mmu.c b/drivers/infiniband/hw/sif/sif_mmu.c index 32ba74cabb96..224ba352e0b0 100644 --- a/drivers/infiniband/hw/sif/sif_mmu.c +++ b/drivers/infiniband/hw/sif/sif_mmu.c @@ -277,7 +277,7 @@ static int sif_map_gva_ctx(struct sif_dev *sdev, if (multipage) { ctx->pt = sif_pt_create(sdev, sif_mem_get_sgl(mem), - ctx->base, ctx->size, page_shift, false, false); + ctx->base, ctx->size, page_shift, false, false, false); if (!ctx->pt) return -ENOMEM; set_ctx(sdev, ctx, sif_pt_root_table_level(ctx->pt), sif_pt_dma_root(ctx->pt)); @@ -324,7 +324,7 @@ static int sif_map_special_ctx(struct sif_dev *sdev, hw_ctx->translation_type = MMU_GVA2GPA_MODE; if (multipage) { - ctx->pt = sif_pt_create_for_mem(mem, ctx->base, 12, true, true); + ctx->pt = sif_pt_create_for_mem(mem, ctx->base, 12, true, true, false); if (!ctx->pt) return -ENOMEM; set_ctx(sdev, ctx, sif_pt_root_table_level(ctx->pt), sif_pt_dma_root(ctx->pt)); @@ -404,7 +404,7 @@ int sif_map_fmr_ctx(struct sif_dev *sdev, set_ctx(sdev, ctx, leaf_level, aligned_dma_addr); } else if (!ctx->pt) { - ctx->pt = sif_pt_create_for_mem(mem, ctx->base, page_shift, true, false); + ctx->pt = sif_pt_create_for_mem(mem, ctx->base, page_shift, true, false, false); if (!ctx->pt) return -ENOMEM; set_ctx(sdev, ctx, sif_pt_root_table_level(ctx->pt), sif_pt_dma_root(ctx->pt)); diff --git a/drivers/infiniband/hw/sif/sif_pt.c b/drivers/infiniband/hw/sif/sif_pt.c index 23d76ef2c093..c7a1d8a5c3d4 100644 --- a/drivers/infiniband/hw/sif/sif_pt.c +++ b/drivers/infiniband/hw/sif/sif_pt.c @@ -58,6 +58,30 @@ void sif_pt_exit(void) /* some utilities: */ + +/* Abstract locking functions: + * TBD: In principle this allows locking to be skipped if the page table + * does not need to be enforcing thread safeness. + * Right now use locking to trap if two threads are trying to modify a + * table that is assumed to be accessed serialized: + * This is a measure to avoid memory overwrites or corruction + * if used like we assume is the case in Orabug: 24655978. + * + */ +static inline void pt_lock(struct sif_pt *pt) +{ + if (pt->thread_safe) + mutex_lock(&pt->lock); + else + BUG_ON(!mutex_trylock(&pt->lock)); +} + +static inline void pt_unlock(struct sif_pt *pt) +{ + mutex_unlock(&pt->lock); +} + + /* Find the optimal page size (represented by the leaf level) * to use based on device capabilities, configuration and a max_shift * value (typically based on continuousness of memory. @@ -347,7 +371,7 @@ static void sif_pt_release(struct kref *kref) */ struct sif_pt *sif_pt_create(struct sif_dev *sdev, struct scatterlist *sg, u64 vstart, size_t size, u32 page_shift, - bool modifiable, bool fixed_top) + bool modifiable, bool fixed_top, bool thread_safe) { int ret = 0; int i; @@ -367,6 +391,7 @@ struct sif_pt *sif_pt_create(struct sif_dev *sdev, struct scatterlist *sg, pt->sdev = sdev; pt->fixed_top = fixed_top; pt->modifiable = modifiable; + pt->thread_safe = thread_safe; ret = find_optimal_leaf_level(sdev, page_shift, vstart, dma_start, size, @@ -395,7 +420,8 @@ extend_failed: struct sif_pt *sif_pt_create_for_mem(struct sif_mem *mem, - u64 vstart, u32 page_shift, bool modifiable, bool fixed_top) + u64 vstart, u32 page_shift, + bool modifiable, bool fixed_top, bool thread_safe) { int ret = 0; int i; @@ -411,6 +437,7 @@ struct sif_pt *sif_pt_create_for_mem(struct sif_mem *mem, pt->sdev = sdev; pt->fixed_top = fixed_top; pt->modifiable = modifiable; + pt->thread_safe = thread_safe; ret = find_optimal_leaf_level(sdev, page_shift, vstart, sif_mem_dma(mem, 0), size, &pt->leaf_level, &pt->pte_ext_shift); @@ -447,7 +474,8 @@ struct sif_pt *sif_pt_create_empty(struct sif_dev *sdev, u64 vstart, enum sif_me if (map_mt == SIFMT_2M) page_shift += sdev->mi.level_shift; - pt = sif_pt_create(sdev, NULL, vstart, 0, page_shift, true, map_mt == SIFMT_CS); + pt = sif_pt_create(sdev, NULL, vstart, 0, page_shift, true, map_mt == SIFMT_CS, + map_mt == SIFMT_CS); if (!pt) return NULL; @@ -953,7 +981,7 @@ int sif_pt_entry(struct sif_pt *pt, u64 vaddr, dma_addr_t *entry, dma_addr_t *va u8 level; int i, ip; - mutex_lock(&pt->lock); + pt_lock(pt); level = pt->leaf_level; va_up = vaddr & ~level_to_pagemask(pt, level); pt_shift = level_to_pageshift(pt, level-1); @@ -972,7 +1000,7 @@ int sif_pt_entry(struct sif_pt *pt, u64 vaddr, dma_addr_t *entry, dma_addr_t *va sif_log(sdev, SIF_MMU_V, "Page at vaddr %llx not found", va_up); ret = -EINVAL; } - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; } @@ -1141,7 +1169,8 @@ int sif_pt_extend(struct sif_pt *pt, struct scatterlist *sg, u64 vstart, size_t sif_log(pt->sdev, SIF_MMU, "** vstart %llx size %lx page size %llx leaf_level %d **", vstart, size, page_mask + 1, pt->leaf_level); - mutex_lock(&pt->lock); + + pt_lock(pt); /* Calculate a good size of each sg table in the kmem object: */ if (!pt->top) { @@ -1182,13 +1211,13 @@ int sif_pt_extend(struct sif_pt *pt, struct scatterlist *sg, u64 vstart, size_t pt->vstart = new_start; pt->vsize = new_size; - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; populate_failed: kref_put(&pt->refcnt, sif_pt_release); kmem_ext_failed: sif_kmem_free(pt->sdev, &pt->m); - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; } @@ -1209,8 +1238,8 @@ int sif_pt_extend_with_mem(struct sif_pt *pt, struct sif_mem *mem, u64 vstart) sif_log(pt->sdev, SIF_MMU, "** vstart %llx size %lx page size %llx leaf level %d **", vstart, size, page_mask + 1, pt->leaf_level); - mutex_lock(&pt->lock); + pt_lock(pt); /* Calculate a good size of each sg table in the kmem object: */ if (!pt->top) { /* This is a blank pt - allocate and set up the initial structures */ @@ -1247,12 +1276,12 @@ int sif_pt_extend_with_mem(struct sif_pt *pt, struct sif_mem *mem, u64 vstart) pt->vstart = new_start; pt->vsize = new_size; - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; kmem_ext_failed: sif_kmem_free(pt->sdev, &pt->m); - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; } @@ -1271,7 +1300,8 @@ int sif_pt_free_part(struct sif_pt *pt, u64 vstart, size_t size) sif_log(pt->sdev, SIF_PT_V, "** vstart %llx -> %llx, size %lx **", vstart, va, size); page_size = level_to_pagesize(pt, level - 1); - mutex_lock(&pt->lock); + + pt_lock(pt); p = find_page(pt, level, va_up); if (!p) { sif_log(pt->sdev, SIF_INFO, "vaddr %llx not found at level %d", @@ -1311,11 +1341,11 @@ int sif_pt_free_part(struct sif_pt *pt, u64 vstart, size_t size) */ dma_sync_sg_for_device(pt->sdev->ib_dev.dma_device, pt->m.sg, pt->m.sg_max, DMA_TO_DEVICE); - mutex_unlock(&pt->lock); + pt_unlock(pt); return kref_put(&pt->refcnt, sif_pt_release); failed: - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; } @@ -1368,8 +1398,8 @@ int sif_pt_remap_for_mem(struct sif_pt *pt, struct sif_mem *mem, u32 page_shift, } sif_log(pt->sdev, SIF_MMU_V, "** vstart %llx size %llx **", vstart, mem->size); - mutex_lock(&pt->lock); + pt_lock(pt); /* Fast path: Repopulate ptes directly - all ref.cnts are kept as is: */ ret = populate_pt_from_mem(pt, mem, vstart, true); @@ -1380,7 +1410,7 @@ int sif_pt_remap_for_mem(struct sif_pt *pt, struct sif_mem *mem, u32 page_shift, */ if (!ret) dma_sync_sg_for_device(pt->sdev->ib_dev.dma_device, pt->m.sg, pt->m.sg_max, DMA_TO_DEVICE); - mutex_unlock(&pt->lock); + pt_unlock(pt); return ret; } diff --git a/drivers/infiniband/hw/sif/sif_pt.h b/drivers/infiniband/hw/sif/sif_pt.h index e62a91e9fb14..c199148e26e3 100644 --- a/drivers/infiniband/hw/sif/sif_pt.h +++ b/drivers/infiniband/hw/sif/sif_pt.h @@ -62,6 +62,7 @@ struct sif_pt { struct sif_dev *sdev; /* Device this mapping is valid for */ bool fixed_top; /* If set, pt guarantees that the top node remains constant */ bool modifiable; /* Set if this page table should support modification */ + bool thread_safe; /* Set if multiple threads may modify the page table */ u8 top_level; /* Page table level of top node, 0 means no table */ u8 leaf_level; /* Page table level of leaf node */ u8 pte_ext_shift; /* Only populate every (1 << pte_ext_shift) pte */ @@ -96,17 +97,22 @@ struct sif_pt *sif_pt_create_empty(struct sif_dev *sdev, u64 vstart, enum sif_me * Set @modifiable to allow the table to be extended and shrinked * Set @fixed_top to have pt guarantee that the top node remains constant * in which case it will always be a level 4 tree. + * set @thread_safe if the page table can be modified in parallel by multiple threads, + * which requires the mutex lock to be held. Such page tables cannot be + * called from interrupt context due to locking needs. */ struct sif_pt *sif_pt_create(struct sif_dev *sdev, struct scatterlist *sg, u64 vstart, size_t mapsize, - u32 page_shift, bool modifiable, bool fixed_top); + u32 page_shift, bool modifiable, bool fixed_top, + bool thread_safe); /* Create a sif page table from a mem object: * Set @fixed_top to prepare for a table where the top node is fixed: * (will always be a level 4 tree) */ struct sif_pt *sif_pt_create_for_mem(struct sif_mem *mem, u64 vstart, - u32 page_shift, bool modifiable, bool fixed_top); + u32 page_shift, bool modifiable, + bool fixed_top, bool thread_safe); /* Remap the (remappable) page table to be used starting at vstart for the range of mem * eg. replace the current mapping with a new one, preserving the top node -- 2.50.1