* this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <drm/drm_mm.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 
-struct etnaviv_cmdbuf *etnaviv_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
-       size_t nr_bos)
+#define SUBALLOC_SIZE          SZ_256K
+#define SUBALLOC_GRANULE       SZ_4K
+#define SUBALLOC_GRANULES      (SUBALLOC_SIZE / SUBALLOC_GRANULE)
+
+struct etnaviv_cmdbuf_suballoc {
+       /* suballocated dma buffer properties */
+       struct etnaviv_gpu *gpu;
+       void *vaddr;
+       dma_addr_t paddr;
+
+       /* GPU mapping */
+       u32 iova;
+       struct drm_mm_node vram_node; /* only used on MMUv2 */
+
+       /* allocation management */
+       struct mutex lock;
+       DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
+       int free_space;
+       wait_queue_head_t free_event;
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+{
+       struct etnaviv_cmdbuf_suballoc *suballoc;
+       int ret;
+
+       suballoc = kzalloc(sizeof(*suballoc), GFP_KERNEL);
+       if (!suballoc)
+               return ERR_PTR(-ENOMEM);
+
+       suballoc->gpu = gpu;
+       mutex_init(&suballoc->lock);
+       init_waitqueue_head(&suballoc->free_event);
+
+       suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+                                      &suballoc->paddr, GFP_KERNEL);
+       if (!suballoc->vaddr)
+               goto free_suballoc;
+
+       ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
+                                           &suballoc->vram_node, SUBALLOC_SIZE,
+                                           &suballoc->iova);
+       if (ret)
+               goto free_dma;
+
+       return suballoc;
+
+free_dma:
+       dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
+free_suballoc:
+       kfree(suballoc);
+
+       return NULL;
+}
+
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
+{
+       etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
+                                     SUBALLOC_SIZE, suballoc->iova);
+       dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+                   suballoc->paddr);
+       kfree(suballoc);
+}
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+                  size_t nr_bos)
 {
        struct etnaviv_cmdbuf *cmdbuf;
        size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
                                 sizeof(*cmdbuf));
+       int granule_offs, order, ret;
 
        cmdbuf = kzalloc(sz, GFP_KERNEL);
        if (!cmdbuf)
                return NULL;
 
-       if (gpu->mmu->version == ETNAVIV_IOMMU_V2)
-               size = ALIGN(size, SZ_4K);
+       cmdbuf->suballoc = suballoc;
+       cmdbuf->size = size;
 
-       cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
-                                    GFP_KERNEL);
-       if (!cmdbuf->vaddr) {
-               kfree(cmdbuf);
-               return NULL;
+       order = order_base_2(ALIGN(size, SUBALLOC_GRANULE) / SUBALLOC_GRANULE);
+retry:
+       mutex_lock(&suballoc->lock);
+       granule_offs = bitmap_find_free_region(suballoc->granule_map,
+                                       SUBALLOC_GRANULES, order);
+       if (granule_offs < 0) {
+               suballoc->free_space = 0;
+               mutex_unlock(&suballoc->lock);
+               ret = wait_event_interruptible_timeout(suballoc->free_event,
+                                                      suballoc->free_space,
+                                                      msecs_to_jiffies(10 * 1000));
+               if (!ret) {
+                       dev_err(suballoc->gpu->dev,
+                               "Timeout waiting for cmdbuf space\n");
+                       return NULL;
+               }
+               goto retry;
        }
-
-       cmdbuf->gpu = gpu;
-       cmdbuf->size = size;
+       mutex_unlock(&suballoc->lock);
+       cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
+       cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
 
        return cmdbuf;
 }
 
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-       etnaviv_iommu_put_cmdbuf_va(cmdbuf->gpu, cmdbuf);
-       dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
-                   cmdbuf->paddr);
+       struct etnaviv_cmdbuf_suballoc *suballoc = cmdbuf->suballoc;
+       int order = order_base_2(ALIGN(cmdbuf->size, SUBALLOC_GRANULE) /
+                                SUBALLOC_GRANULE);
+
+       mutex_lock(&suballoc->lock);
+       bitmap_release_region(suballoc->granule_map,
+                             cmdbuf->suballoc_offset / SUBALLOC_GRANULE,
+                             order);
+       suballoc->free_space = 1;
+       mutex_unlock(&suballoc->lock);
+       wake_up_all(&suballoc->free_event);
        kfree(cmdbuf);
 }
 
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
 {
-       return etnaviv_iommu_get_cmdbuf_va(buf->gpu, buf);
+       return buf->suballoc->iova + buf->suballoc_offset;
 }
 
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
 {
-       return buf->paddr;
+       return buf->suballoc->paddr + buf->suballoc_offset;
 }
 
 #ifndef __ETNAVIV_CMDBUF_H__
 #define __ETNAVIV_CMDBUF_H__
 
-#include <drm/drm_mm.h>
 #include <linux/types.h>
 
+struct etnaviv_gpu;
+struct etnaviv_cmdbuf_suballoc;
+
 struct etnaviv_cmdbuf {
-       /* device this cmdbuf is allocated for */
-       struct etnaviv_gpu *gpu;
+       /* suballocator this cmdbuf is allocated from */
+       struct etnaviv_cmdbuf_suballoc *suballoc;
        /* user context key, must be unique between all active users */
        struct etnaviv_file_private *ctx;
        /* cmdbuf properties */
+       int suballoc_offset;
        void *vaddr;
-       dma_addr_t paddr;
        u32 size;
        u32 user_size;
-       /* vram node used if the cmdbuf is mapped through the MMUv2 */
-       struct drm_mm_node vram_node;
        /* fence after which this buffer is to be disposed */
        struct dma_fence *fence;
        /* target exec state */
        struct etnaviv_vram_mapping *bo_map[0];
 };
 
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+                  size_t nr_bos);
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+
 u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);
 
 
        bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
        relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
        stream = drm_malloc_ab(1, args->stream_size);
-       cmdbuf = etnaviv_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
-                                       args->nr_bos);
+       cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
+                                   ALIGN(args->stream_size, 8) + 8,
+                                   args->nr_bos);
        if (!bos || !relocs || !stream || !cmdbuf) {
                ret = -ENOMEM;
                goto err_submit_cmds;
 
                goto fail;
        }
 
+       gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
+       if (IS_ERR(gpu->cmdbuf_suballoc)) {
+               dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
+               ret = PTR_ERR(gpu->cmdbuf_suballoc);
+               goto fail;
+       }
+
        /* Create buffer: */
-       gpu->buffer = etnaviv_cmdbuf_new(gpu, PAGE_SIZE, 0);
+       gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0);
        if (!gpu->buffer) {
                ret = -ENOMEM;
                dev_err(gpu->dev, "could not create command buffer\n");
                gpu->buffer = NULL;
        }
 
+       if (gpu->cmdbuf_suballoc) {
+               etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+               gpu->cmdbuf_suballoc = NULL;
+       }
+
        if (gpu->mmu) {
                etnaviv_iommu_destroy(gpu->mmu);
                gpu->mmu = NULL;
 
        struct dma_fence *fence;
 };
 
+struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_cmdbuf;
 
 struct etnaviv_gpu {
        int irq;
 
        struct etnaviv_iommu *mmu;
+       struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
 
        /* Power Control: */
        struct clk *clk_bus;
        struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
-struct etnaviv_cmdbuf *etnaviv_cmdbuf_new(struct etnaviv_gpu *gpu,
-                                             u32 size, size_t nr_bos);
-void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
 
                etnaviv_iommuv2_restore(gpu);
 }
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-                               struct etnaviv_cmdbuf *buf)
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+                                 struct drm_mm_node *vram_node, size_t size,
+                                 u32 *iova)
 {
        struct etnaviv_iommu *mmu = gpu->mmu;
 
        if (mmu->version == ETNAVIV_IOMMU_V1) {
-               return buf->paddr - gpu->memory_base;
+               *iova = paddr - gpu->memory_base;
+               return 0;
        } else {
                int ret;
 
-               if (buf->vram_node.allocated)
-                       return (u32)buf->vram_node.start;
-
                mutex_lock(&mmu->lock);
-               ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node,
-                                             buf->size + SZ_64K);
+               ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
                if (ret < 0) {
                        mutex_unlock(&mmu->lock);
-                       return 0;
+                       return ret;
                }
-               ret = iommu_map(mmu->domain, buf->vram_node.start, buf->paddr,
-                               buf->size, IOMMU_READ);
+               ret = iommu_map(mmu->domain, vram_node->start, paddr, size,
+                               IOMMU_READ);
                if (ret < 0) {
-                       drm_mm_remove_node(&buf->vram_node);
+                       drm_mm_remove_node(vram_node);
                        mutex_unlock(&mmu->lock);
-                       return 0;
+                       return ret;
                }
-               /*
-                * At least on GC3000 the FE MMU doesn't properly flush old TLB
-                * entries. Make sure to space the command buffers out in a way
-                * that the FE MMU prefetch won't load invalid entries.
-                */
-               mmu->last_iova = buf->vram_node.start + buf->size + SZ_64K;
+               mmu->last_iova = vram_node->start + size;
                gpu->mmu->need_flush = true;
                mutex_unlock(&mmu->lock);
 
-               return (u32)buf->vram_node.start;
+               *iova = (u32)vram_node->start;
+               return 0;
        }
 }
 
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-                                struct etnaviv_cmdbuf *buf)
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+                                  struct drm_mm_node *vram_node, size_t size,
+                                  u32 iova)
 {
        struct etnaviv_iommu *mmu = gpu->mmu;
 
-       if (mmu->version == ETNAVIV_IOMMU_V2 && buf->vram_node.allocated) {
+       if (mmu->version == ETNAVIV_IOMMU_V2) {
                mutex_lock(&mmu->lock);
-               iommu_unmap(mmu->domain, buf->vram_node.start, buf->size);
-               drm_mm_remove_node(&buf->vram_node);
+               iommu_unmap(mmu->domain,iova, size);
+               drm_mm_remove_node(vram_node);
                mutex_unlock(&mmu->lock);
        }
 }
 
        struct etnaviv_vram_mapping *mapping);
 void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-                               struct etnaviv_cmdbuf *buf);
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-                                struct etnaviv_cmdbuf *buf);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+                                 struct drm_mm_node *vram_node, size_t size,
+                                 u32 *iova);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+                                  struct drm_mm_node *vram_node, size_t size,
+                                  u32 iova);
 
 size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
 void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);