// SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
 
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/pci-p2pdma.h>
 
 #define HL_MMU_DEBUG   0
 
                        return -EINVAL;
                }
 
+               if (phys_pg_pack->exporting_cnt) {
+                       dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle);
+                       spin_unlock(&vm->idr_lock);
+                       return -EINVAL;
+               }
+
                /*
                 * must remove from idr before the freeing of the physical
                 * pages as the refcount of the pool is also the trigger of the
        return 0;
 }
 
+static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
+                       struct device *dev, enum dma_data_direction dir)
+{
+       dma_addr_t addr;
+       int rc;
+
+       addr = dma_map_resource(dev, bar_address, chunk_size, dir,
+                               DMA_ATTR_SKIP_CPU_SYNC);
+       rc = dma_mapping_error(dev, addr);
+       if (rc)
+               return rc;
+
+       sg_set_page(sg, NULL, chunk_size, 0);
+       sg_dma_address(sg) = addr;
+       sg_dma_len(sg) = chunk_size;
+
+       return 0;
+}
+
+static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
+                                               u64 page_size, struct device *dev,
+                                               enum dma_data_direction dir)
+{
+       u64 chunk_size, bar_address, dma_max_seg_size;
+       struct asic_fixed_properties *prop;
+       int rc, i, j, nents, cur_page;
+       struct scatterlist *sg;
+       struct sg_table *sgt;
+
+       prop = &hdev->asic_prop;
+
+       dma_max_seg_size = dma_get_max_seg_size(dev);
+
+       /* We would like to align the max segment size to PAGE_SIZE, so the
+        * SGL will contain aligned addresses that can be easily mapped to
+        * an MMU
+        */
+       dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+       if (dma_max_seg_size < PAGE_SIZE) {
+               dev_err_ratelimited(hdev->dev,
+                               "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
+                               dma_max_seg_size);
+               return ERR_PTR(-EINVAL);
+       }
+
+       sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+       if (!sgt)
+               return ERR_PTR(-ENOMEM);
+
+       /* If the size of each page is larger than the dma max segment size,
+        * then we can't combine pages and the number of entries in the SGL
+        * will just be the
+        * <number of pages> * <chunks of max segment size in each page>
+        */
+       if (page_size > dma_max_seg_size)
+               nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
+       else
+               /* Get number of non-contiguous chunks */
+               for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
+                       if (pages[i - 1] + page_size != pages[i] ||
+                                       chunk_size + page_size > dma_max_seg_size) {
+                               nents++;
+                               chunk_size = page_size;
+                               continue;
+                       }
+
+                       chunk_size += page_size;
+               }
+
+       rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
+       if (rc)
+               goto error_free;
+
+       cur_page = 0;
+
+       if (page_size > dma_max_seg_size) {
+               u64 size_left, cur_device_address = 0;
+
+               size_left = page_size;
+
+               /* Need to split each page into the number of chunks of
+                * dma_max_seg_size
+                */
+               for_each_sgtable_dma_sg(sgt, sg, i) {
+                       if (size_left == page_size)
+                               cur_device_address =
+                                       pages[cur_page] - prop->dram_base_address;
+                       else
+                               cur_device_address += dma_max_seg_size;
+
+                       chunk_size = min(size_left, dma_max_seg_size);
+
+                       bar_address = hdev->dram_pci_bar_start + cur_device_address;
+
+                       rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+                       if (rc)
+                               goto error_unmap;
+
+                       if (size_left > dma_max_seg_size) {
+                               size_left -= dma_max_seg_size;
+                       } else {
+                               cur_page++;
+                               size_left = page_size;
+                       }
+               }
+       } else {
+               /* Merge pages and put them into the scatterlist */
+               for_each_sgtable_dma_sg(sgt, sg, i) {
+                       chunk_size = page_size;
+                       for (j = cur_page + 1 ; j < npages ; j++) {
+                               if (pages[j - 1] + page_size != pages[j] ||
+                                               chunk_size + page_size > dma_max_seg_size)
+                                       break;
+
+                               chunk_size += page_size;
+                       }
+
+                       bar_address = hdev->dram_pci_bar_start +
+                                       (pages[cur_page] - prop->dram_base_address);
+
+                       rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+                       if (rc)
+                               goto error_unmap;
+
+                       cur_page = j;
+               }
+       }
+
+       /* Because we are not going to include a CPU list we want to have some
+        * chance that other users will detect this by setting the orig_nents
+        * to 0 and using only nents (length of DMA list) when going over the
+        * sgl
+        */
+       sgt->orig_nents = 0;
+
+       return sgt;
+
+error_unmap:
+       for_each_sgtable_dma_sg(sgt, sg, i) {
+               if (!sg_dma_len(sg))
+                       continue;
+
+               dma_unmap_resource(dev, sg_dma_address(sg),
+                                       sg_dma_len(sg), dir,
+                                       DMA_ATTR_SKIP_CPU_SYNC);
+       }
+
+       sg_free_table(sgt);
+
+error_free:
+       kfree(sgt);
+       return ERR_PTR(rc);
+}
+
+static int hl_dmabuf_attach(struct dma_buf *dmabuf,
+                               struct dma_buf_attachment *attachment)
+{
+       struct hl_dmabuf_priv *hl_dmabuf;
+       struct hl_device *hdev;
+       int rc;
+
+       hl_dmabuf = dmabuf->priv;
+       hdev = hl_dmabuf->ctx->hdev;
+
+       rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
+
+       if (rc < 0)
+               attachment->peer2peer = false;
+       return 0;
+}
+
+static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
+                                       enum dma_data_direction dir)
+{
+       struct dma_buf *dma_buf = attachment->dmabuf;
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       struct hl_dmabuf_priv *hl_dmabuf;
+       struct hl_device *hdev;
+       struct sg_table *sgt;
+
+       hl_dmabuf = dma_buf->priv;
+       hdev = hl_dmabuf->ctx->hdev;
+       phys_pg_pack = hl_dmabuf->phys_pg_pack;
+
+       if (!attachment->peer2peer) {
+               dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
+               return ERR_PTR(-EPERM);
+       }
+
+       if (phys_pg_pack)
+               sgt = alloc_sgt_from_device_pages(hdev,
+                                               phys_pg_pack->pages,
+                                               phys_pg_pack->npages,
+                                               phys_pg_pack->page_size,
+                                               attachment->dev,
+                                               dir);
+       else
+               sgt = alloc_sgt_from_device_pages(hdev,
+                                               &hl_dmabuf->device_address,
+                                               1,
+                                               hl_dmabuf->dmabuf->size,
+                                               attachment->dev,
+                                               dir);
+
+       if (IS_ERR(sgt))
+               dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
+
+       return sgt;
+}
+
+static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
+                                 struct sg_table *sgt,
+                                 enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
+        * only in the 'device' domain (after all, it maps a PCI bar address which points to the
+        * device memory).
+        *
+        * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
+        * a sync of the memory to the CPU's cache, as it never resided inside that cache.
+        */
+       for_each_sgtable_dma_sg(sgt, sg, i)
+               dma_unmap_resource(attachment->dev, sg_dma_address(sg),
+                                       sg_dma_len(sg), dir,
+                                       DMA_ATTR_SKIP_CPU_SYNC);
+
+       /* Need to restore orig_nents because sg_free_table use that field */
+       sgt->orig_nents = sgt->nents;
+       sg_free_table(sgt);
+       kfree(sgt);
+}
+
+static void hl_release_dmabuf(struct dma_buf *dmabuf)
+{
+       struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
+       struct hl_ctx *ctx = hl_dmabuf->ctx;
+       struct hl_device *hdev = ctx->hdev;
+       struct hl_vm *vm = &hdev->vm;
+
+       if (hl_dmabuf->phys_pg_pack) {
+               spin_lock(&vm->idr_lock);
+               hl_dmabuf->phys_pg_pack->exporting_cnt--;
+               spin_unlock(&vm->idr_lock);
+       }
+
+       hl_ctx_put(hl_dmabuf->ctx);
+
+       kfree(hl_dmabuf);
+}
+
+static const struct dma_buf_ops habanalabs_dmabuf_ops = {
+       .attach = hl_dmabuf_attach,
+       .map_dma_buf = hl_map_dmabuf,
+       .unmap_dma_buf = hl_unmap_dmabuf,
+       .release = hl_release_dmabuf,
+};
+
+static int export_dmabuf_common(struct hl_ctx *ctx,
+                               struct hl_dmabuf_priv *hl_dmabuf,
+                               u64 total_size, int flags, int *dmabuf_fd)
+{
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+       struct hl_device *hdev = ctx->hdev;
+       int rc, fd;
+
+       exp_info.ops = &habanalabs_dmabuf_ops;
+       exp_info.size = total_size;
+       exp_info.flags = flags;
+       exp_info.priv = hl_dmabuf;
+
+       hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
+       if (IS_ERR(hl_dmabuf->dmabuf)) {
+               dev_err(hdev->dev, "failed to export dma-buf\n");
+               return PTR_ERR(hl_dmabuf->dmabuf);
+       }
+
+       fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
+       if (fd < 0) {
+               dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n");
+               rc = fd;
+               goto err_dma_buf_put;
+       }
+
+       hl_dmabuf->ctx = ctx;
+       hl_ctx_get(hdev, hl_dmabuf->ctx);
+
+       *dmabuf_fd = fd;
+
+       return 0;
+
+err_dma_buf_put:
+       dma_buf_put(hl_dmabuf->dmabuf);
+       return rc;
+}
+
+/**
+ * export_dmabuf_from_addr() - export a dma-buf object for the given memory
+ *                             address and size.
+ * @ctx: pointer to the context structure.
+ * @device_addr:  device memory physical address.
+ * @size: size of device memory.
+ * @flags: DMA-BUF file/FD flags.
+ * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
+ *
+ * Create and export a dma-buf object for an existing memory allocation inside
+ * the device memory, and return a FD which is associated with the dma-buf
+ * object.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr,
+                                       u64 size, int flags, int *dmabuf_fd)
+{
+       struct hl_dmabuf_priv *hl_dmabuf;
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop;
+       u64 bar_address;
+       int rc;
+
+       prop = &hdev->asic_prop;
+
+       if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+               dev_dbg(hdev->dev,
+                       "exported device memory address 0x%llx should be aligned to 0x%lx\n",
+                       device_addr, PAGE_SIZE);
+               return -EINVAL;
+       }
+
+       if (size < PAGE_SIZE) {
+               dev_dbg(hdev->dev,
+                       "exported device memory size %llu should be equal to or greater than %lu\n",
+                       size, PAGE_SIZE);
+               return -EINVAL;
+       }
+
+       if (device_addr < prop->dram_user_base_address ||
+                               device_addr + size > prop->dram_end_address ||
+                               device_addr + size < device_addr) {
+               dev_dbg(hdev->dev,
+                       "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
+                       device_addr, size);
+               return -EINVAL;
+       }
+
+       bar_address = hdev->dram_pci_bar_start +
+                       (device_addr - prop->dram_base_address);
+
+       if (bar_address + size >
+                       hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
+                       bar_address + size < bar_address) {
+               dev_dbg(hdev->dev,
+                       "DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
+                       device_addr, size);
+               return -EINVAL;
+       }
+
+       hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
+       if (!hl_dmabuf)
+               return -ENOMEM;
+
+       hl_dmabuf->device_address = device_addr;
+
+       rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd);
+       if (rc)
+               goto err_free_dmabuf_wrapper;
+
+       return 0;
+
+err_free_dmabuf_wrapper:
+       kfree(hl_dmabuf);
+       return rc;
+}
+
+/**
+ * export_dmabuf_from_handle() - export a dma-buf object for the given memory
+ *                               handle.
+ * @ctx: pointer to the context structure.
+ * @handle: device memory allocation handle.
+ * @flags: DMA-BUF file/FD flags.
+ * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
+ *
+ * Create and export a dma-buf object for an existing memory allocation inside
+ * the device memory, and return a FD which is associated with the dma-buf
+ * object.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags,
+                                       int *dmabuf_fd)
+{
+       struct hl_vm_phys_pg_pack *phys_pg_pack;
+       struct hl_dmabuf_priv *hl_dmabuf;
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop;
+       struct hl_vm *vm = &hdev->vm;
+       u64 bar_address;
+       int rc, i;
+
+       prop = &hdev->asic_prop;
+
+       if (upper_32_bits(handle)) {
+               dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle);
+               return -EINVAL;
+       }
+
+       spin_lock(&vm->idr_lock);
+
+       phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle);
+       if (!phys_pg_pack) {
+               spin_unlock(&vm->idr_lock);
+               dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle);
+               return -EINVAL;
+       }
+
+       /* increment now to avoid freeing device memory while exporting */
+       phys_pg_pack->exporting_cnt++;
+
+       spin_unlock(&vm->idr_lock);
+
+       if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
+               dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle);
+               rc = -EINVAL;
+               goto err_dec_exporting_cnt;
+       }
+
+       for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+
+               bar_address = hdev->dram_pci_bar_start +
+                                               (phys_pg_pack->pages[i] -
+                                               prop->dram_base_address);
+
+               if (bar_address + phys_pg_pack->page_size >
+                       hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
+                       bar_address + phys_pg_pack->page_size < bar_address) {
+
+                       dev_dbg(hdev->dev,
+                               "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
+                               phys_pg_pack->pages[i],
+                               phys_pg_pack->page_size);
+
+                       rc = -EINVAL;
+                       goto err_dec_exporting_cnt;
+               }
+       }
+
+       hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
+       if (!hl_dmabuf) {
+               rc = -ENOMEM;
+               goto err_dec_exporting_cnt;
+       }
+
+       hl_dmabuf->phys_pg_pack = phys_pg_pack;
+
+       rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size,
+                               flags, dmabuf_fd);
+       if (rc)
+               goto err_free_dmabuf_wrapper;
+
+       return 0;
+
+err_free_dmabuf_wrapper:
+       kfree(hl_dmabuf);
+
+err_dec_exporting_cnt:
+       spin_lock(&vm->idr_lock);
+       phys_pg_pack->exporting_cnt--;
+       spin_unlock(&vm->idr_lock);
+
+       return rc;
+}
+
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
        struct hl_device *hdev = hpriv->hdev;
        struct hl_ctx *ctx = hpriv->ctx;
        u64 block_handle, device_addr = 0;
        u32 handle = 0, block_size;
-       int rc;
+       int rc, dmabuf_fd = -EBADF;
 
        switch (args->in.op) {
        case HL_MEM_OP_ALLOC:
                args->out.block_size = block_size;
                break;
 
+       case HL_MEM_OP_EXPORT_DMABUF_FD:
+               rc = export_dmabuf_from_addr(ctx,
+                               args->in.export_dmabuf_fd.handle,
+                               args->in.export_dmabuf_fd.mem_size,
+                               args->in.flags,
+                               &dmabuf_fd);
+               memset(args, 0, sizeof(*args));
+               args->out.fd = dmabuf_fd;
+               break;
+
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -ENOTTY;
        struct hl_ctx *ctx = hpriv->ctx;
        u64 block_handle, device_addr = 0;
        u32 handle = 0, block_size;
-       int rc;
+       int rc, dmabuf_fd = -EBADF;
 
        if (!hl_device_operational(hdev, &status)) {
                dev_warn_ratelimited(hdev->dev,
                args->out.block_size = block_size;
                break;
 
+       case HL_MEM_OP_EXPORT_DMABUF_FD:
+               if (hdev->asic_prop.dram_supports_virtual_memory)
+                       rc = export_dmabuf_from_handle(ctx,
+                                       args->in.export_dmabuf_fd.handle,
+                                       args->in.flags,
+                                       &dmabuf_fd);
+               else
+                       rc = export_dmabuf_from_addr(ctx,
+                                       args->in.export_dmabuf_fd.handle,
+                                       args->in.export_dmabuf_fd.mem_size,
+                                       args->in.flags,
+                                       &dmabuf_fd);
+               memset(args, 0, sizeof(*args));
+               args->out.fd = dmabuf_fd;
+               break;
+
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -ENOTTY;