crypto: caam - add Queue Interface (QI) backend support

author Horia Geantă <horia.geanta@nxp.com>

Fri, 17 Mar 2017 10:06:01 +0000 (12:06 +0200)

committer Herbert Xu <herbert@gondor.apana.org.au>

Fri, 24 Mar 2017 14:02:59 +0000 (22:02 +0800)
author Horia Geantă <horia.geanta@nxp.com>
Fri, 17 Mar 2017 10:06:01 +0000 (12:06 +0200)
committer Herbert Xu <herbert@gondor.apana.org.au>
Fri, 24 Mar 2017 14:02:59 +0000 (22:02 +0800)
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile

index 6554742f357e01de8a01fe23a2b50b63d0532655..2e60e45c2bf1feb2f17ad11149d248223d07e9f2 100644 (file)
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -16,3 +16,7 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
  caam-objs := ctrl.o
  caam_jr-objs := jr.o key_gen.o error.o
  caam_pkc-y := caampkc.o pkc_desc.o
+ifneq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI),)
+       ccflags-y += -DCONFIG_CAAM_QI
+       caam-objs += qi.o
+endif
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c

index fef39f9f41ee200c5ed7138edae76beaefdff3cb..b3a94d5eff260ae2fc8b1a9346d1057d7bbe1f5b 100644 (file)
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -18,6 +18,10 @@
  bool caam_little_end;
  EXPORT_SYMBOL(caam_little_end);
  
+#ifdef CONFIG_CAAM_QI
+#include "qi.h"
+#endif
+
  /*
   * i.MX targets tend to have clock control subsystems that can
   * enable/disable clocking to our device.
@@ -311,6 +315,11 @@ static int caam_remove(struct platform_device *pdev)
         for (ring = 0; ring < ctrlpriv->total_jobrs; ring++)
                 of_device_unregister(ctrlpriv->jrpdev[ring]);
  
+#ifdef CONFIG_CAAM_QI
+       if (ctrlpriv->qidev)
+               caam_qi_shutdown(ctrlpriv->qidev);
+#endif
+
         /* De-initialize RNG state handles initialized by this driver. */
         if (ctrlpriv->rng4_sh_init)
                 deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
@@ -401,23 +410,6 @@ int caam_get_era(void)
  }
  EXPORT_SYMBOL(caam_get_era);
  
-#ifdef CONFIG_DEBUG_FS
-static int caam_debugfs_u64_get(void *data, u64 *val)
-{
-       *val = caam64_to_cpu(*(u64 *)data);
-       return 0;
-}
-
-static int caam_debugfs_u32_get(void *data, u64 *val)
-{
-       *val = caam32_to_cpu(*(u32 *)data);
-       return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
-DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
-#endif
-
  /* Probe routine for CAAM top (controller) level */
  static int caam_probe(struct platform_device *pdev)
  {
@@ -615,6 +607,17 @@ static int caam_probe(struct platform_device *pdev)
                 goto iounmap_ctrl;
         }
  
+#ifdef CONFIG_DEBUG_FS
+       /*
+        * FIXME: needs better naming distinction, as some amalgamation of
+        * "caam" and nprop->full_name. The OF name isn't distinctive,
+        * but does separate instances
+        */
+       perfmon = (struct caam_perfmon __force *)&ctrl->perfmon;
+
+       ctrlpriv->dfs_root = debugfs_create_dir(dev_name(dev), NULL);
+       ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
+#endif
         ring = 0;
         ridx = 0;
         ctrlpriv->total_jobrs = 0;
@@ -650,6 +653,13 @@ static int caam_probe(struct platform_device *pdev)
                                );
                 /* This is all that's required to physically enable QI */
                 wr_reg32(&ctrlpriv->qi->qi_control_lo, QICTL_DQEN);
+
+               /* If QMAN driver is present, init CAAM-QI backend */
+#ifdef CONFIG_CAAM_QI
+               ret = caam_qi_init(pdev);
+               if (ret)
+                       dev_err(dev, "caam qi i/f init failed: %d\n", ret);
+#endif
         }
  
         /* If no QI and no rings specified, quit and go home */
@@ -737,17 +747,6 @@ static int caam_probe(struct platform_device *pdev)
                  ctrlpriv->total_jobrs, ctrlpriv->qi_present);
  
  #ifdef CONFIG_DEBUG_FS
-       /*
-        * FIXME: needs better naming distinction, as some amalgamation of
-        * "caam" and nprop->full_name. The OF name isn't distinctive,
-        * but does separate instances
-        */
-       perfmon = (struct caam_perfmon __force *)&ctrl->perfmon;
-
-       ctrlpriv->dfs_root = debugfs_create_dir(dev_name(dev), NULL);
-       ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
-
-       /* Controller-level - performance monitor counters */
  
         ctrlpriv->ctl_rq_dequeued =
                 debugfs_create_file("rq_dequeued",
@@ -830,6 +829,9 @@ static int caam_probe(struct platform_device *pdev)
         return 0;
  
  caam_remove:
+#ifdef CONFIG_DEBUG_FS
+       debugfs_remove_recursive(ctrlpriv->dfs_root);
+#endif
         caam_remove(pdev);
         return ret;
  
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h

index e2bcacc1a921675cf30f70a40816e1306a8c3ef9..c334df638ff67d78399c0fab8e6a3d6b38db44ac 100644 (file)
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -67,6 +67,9 @@ struct caam_drv_private {
  
         struct device *dev;
         struct platform_device **jrpdev; /* Alloc'ed array per sub-device */
+#ifdef CONFIG_CAAM_QI
+       struct device *qidev;
+#endif
         struct platform_device *pdev;
  
         /* Physical-presence section */
@@ -110,9 +113,30 @@ struct caam_drv_private {
  
         struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
         struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
+#ifdef CONFIG_CAAM_QI
+       struct dentry *qi_congested;
+#endif
  #endif
  };
  
  void caam_jr_algapi_init(struct device *dev);
  void caam_jr_algapi_remove(struct device *dev);
+
+#ifdef CONFIG_DEBUG_FS
+static int caam_debugfs_u64_get(void *data, u64 *val)
+{
+       *val = caam64_to_cpu(*(u64 *)data);
+       return 0;
+}
+
+static int caam_debugfs_u32_get(void *data, u64 *val)
+{
+       *val = caam32_to_cpu(*(u32 *)data);
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
+#endif
+
  #endif /* INTERN_H */
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c

new file mode 100644 (file)

index 0000000..45de8fd
--- /dev/null
+++ b/drivers/crypto/caam/qi.c
@@ -0,0 +1,805 @@
+/*
+ * CAAM/SEC 4.x QI transport/backend driver
+ * Queue Interface backend functionality
+ *
+ * Copyright 2013-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016-2017 NXP
+ */
+
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <soc/fsl/qman.h>
+
+#include "regs.h"
+#include "qi.h"
+#include "desc.h"
+#include "intern.h"
+#include "desc_constr.h"
+
+#define PREHDR_RSLS_SHIFT      31
+
+/*
+ * Use a reasonable backlog of frames (per CPU) as congestion threshold,
+ * so that resources used by the in-flight buffers do not become a memory hog.
+ */
+#define MAX_RSP_FQ_BACKLOG_PER_CPU     256
+
+/* Length of a single buffer in the QI driver memory cache */
+#define CAAM_QI_MEMCACHE_SIZE  512
+
+#define CAAM_QI_ENQUEUE_RETRIES        10000
+
+#define CAAM_NAPI_WEIGHT       63
+
+/*
+ * caam_napi - struct holding CAAM NAPI-related params
+ * @irqtask: IRQ task for QI backend
+ * @p: QMan portal
+ */
+struct caam_napi {
+       struct napi_struct irqtask;
+       struct qman_portal *p;
+};
+
+/*
+ * caam_qi_pcpu_priv - percpu private data structure to main list of pending
+ *                     responses expected on each cpu.
+ * @caam_napi: CAAM NAPI params
+ * @net_dev: netdev used by NAPI
+ * @rsp_fq: response FQ from CAAM
+ */
+struct caam_qi_pcpu_priv {
+       struct caam_napi caam_napi;
+       struct net_device net_dev;
+       struct qman_fq *rsp_fq;
+} ____cacheline_aligned;
+
+static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
+
+/*
+ * caam_qi_priv - CAAM QI backend private params
+ * @cgr: QMan congestion group
+ * @qi_pdev: platform device for QI backend
+ */
+struct caam_qi_priv {
+       struct qman_cgr cgr;
+       struct platform_device *qi_pdev;
+};
+
+static struct caam_qi_priv qipriv ____cacheline_aligned;
+
+/*
+ * This is written by only one core - the one that initialized the CGR - and
+ * read by multiple cores (all the others).
+ */
+bool caam_congested __read_mostly;
+EXPORT_SYMBOL(caam_congested);
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * This is a counter for the number of times the congestion group (where all
+ * the request and response queueus are) reached congestion. Incremented
+ * each time the congestion callback is called with congested == true.
+ */
+static u64 times_congested;
+#endif
+
+/*
+ * CPU from where the module initialised. This is required because QMan driver
+ * requires CGRs to be removed from same CPU from where they were originally
+ * allocated.
+ */
+static int mod_init_cpu;
+
+/*
+ * This is a a cache of buffers, from which the users of CAAM QI driver
+ * can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than
+ * doing malloc on the hotpath.
+ * NOTE: A more elegant solution would be to have some headroom in the frames
+ *       being processed. This could be added by the dpaa-ethernet driver.
+ *       This would pose a problem for userspace application processing which
+ *       cannot know of this limitation. So for now, this will work.
+ * NOTE: The memcache is SMP-safe. No need to handle spinlocks in-here
+ */
+static struct kmem_cache *qi_cache;
+
+int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
+{
+       struct qm_fd fd;
+       dma_addr_t addr;
+       int ret;
+       int num_retries = 0;
+
+       qm_fd_clear_fd(&fd);
+       qm_fd_set_compound(&fd, qm_sg_entry_get_len(&req->fd_sgt[1]));
+
+       addr = dma_map_single(qidev, req->fd_sgt, sizeof(req->fd_sgt),
+                             DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(qidev, addr)) {
+               dev_err(qidev, "DMA mapping error for QI enqueue request\n");
+               return -EIO;
+       }
+       qm_fd_addr_set64(&fd, addr);
+
+       do {
+               ret = qman_enqueue(req->drv_ctx->req_fq, &fd);
+               if (likely(!ret))
+                       return 0;
+
+               if (ret != -EBUSY)
+                       break;
+               num_retries++;
+       } while (num_retries < CAAM_QI_ENQUEUE_RETRIES);
+
+       dev_err(qidev, "qman_enqueue failed: %d\n", ret);
+
+       return ret;
+}
+EXPORT_SYMBOL(caam_qi_enqueue);
+
+static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
+                          const union qm_mr_entry *msg)
+{
+       const struct qm_fd *fd;
+       struct caam_drv_req *drv_req;
+       struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+
+       fd = &msg->ern.fd;
+
+       if (qm_fd_get_format(fd) != qm_fd_compound) {
+               dev_err(qidev, "Non-compound FD from CAAM\n");
+               return;
+       }
+
+       drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd));
+       if (!drv_req) {
+               dev_err(qidev,
+                       "Can't find original request for CAAM response\n");
+               return;
+       }
+
+       dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
+                        sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
+
+       drv_req->cbk(drv_req, -EIO);
+}
+
+static struct qman_fq *create_caam_req_fq(struct device *qidev,
+                                         struct qman_fq *rsp_fq,
+                                         dma_addr_t hwdesc,
+                                         int fq_sched_flag)
+{
+       int ret;
+       struct qman_fq *req_fq;
+       struct qm_mcc_initfq opts;
+
+       req_fq = kzalloc(sizeof(*req_fq), GFP_ATOMIC);
+       if (!req_fq)
+               return ERR_PTR(-ENOMEM);
+
+       req_fq->cb.ern = caam_fq_ern_cb;
+       req_fq->cb.fqs = NULL;
+
+       ret = qman_create_fq(0, QMAN_FQ_FLAG_DYNAMIC_FQID |
+                               QMAN_FQ_FLAG_TO_DCPORTAL, req_fq);
+       if (ret) {
+               dev_err(qidev, "Failed to create session req FQ\n");
+               goto create_req_fq_fail;
+       }
+
+       memset(&opts, 0, sizeof(opts));
+       opts.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_DESTWQ |
+                                  QM_INITFQ_WE_CONTEXTB |
+                                  QM_INITFQ_WE_CONTEXTA | QM_INITFQ_WE_CGID);
+       opts.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_CPCSTASH | QM_FQCTRL_CGE);
+       qm_fqd_set_destwq(&opts.fqd, qm_channel_caam, 2);
+       opts.fqd.context_b = cpu_to_be32(qman_fq_fqid(rsp_fq));
+       qm_fqd_context_a_set64(&opts.fqd, hwdesc);
+       opts.fqd.cgid = qipriv.cgr.cgrid;
+
+       ret = qman_init_fq(req_fq, fq_sched_flag, &opts);
+       if (ret) {
+               dev_err(qidev, "Failed to init session req FQ\n");
+               goto init_req_fq_fail;
+       }
+
+       dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
+                smp_processor_id());
+       return req_fq;
+
+init_req_fq_fail:
+       qman_destroy_fq(req_fq);
+create_req_fq_fail:
+       kfree(req_fq);
+       return ERR_PTR(ret);
+}
+
+static int empty_retired_fq(struct device *qidev, struct qman_fq *fq)
+{
+       int ret;
+
+       ret = qman_volatile_dequeue(fq, QMAN_VOLATILE_FLAG_WAIT_INT |
+                                   QMAN_VOLATILE_FLAG_FINISH,
+                                   QM_VDQCR_PRECEDENCE_VDQCR |
+                                   QM_VDQCR_NUMFRAMES_TILLEMPTY);
+       if (ret) {
+               dev_err(qidev, "Volatile dequeue fail for FQ: %u\n", fq->fqid);
+               return ret;
+       }
+
+       do {
+               struct qman_portal *p;
+
+               p = qman_get_affine_portal(smp_processor_id());
+               qman_p_poll_dqrr(p, 16);
+       } while (fq->flags & QMAN_FQ_STATE_NE);
+
+       return 0;
+}
+
+static int kill_fq(struct device *qidev, struct qman_fq *fq)
+{
+       u32 flags;
+       int ret;
+
+       ret = qman_retire_fq(fq, &flags);
+       if (ret < 0) {
+               dev_err(qidev, "qman_retire_fq failed: %d\n", ret);
+               return ret;
+       }
+
+       if (!ret)
+               goto empty_fq;
+
+       /* Async FQ retirement condition */
+       if (ret == 1) {
+               /* Retry till FQ gets in retired state */
+               do {
+                       msleep(20);
+               } while (fq->state != qman_fq_state_retired);
+
+               WARN_ON(fq->flags & QMAN_FQ_STATE_BLOCKOOS);
+               WARN_ON(fq->flags & QMAN_FQ_STATE_ORL);
+       }
+
+empty_fq:
+       if (fq->flags & QMAN_FQ_STATE_NE) {
+               ret = empty_retired_fq(qidev, fq);
+               if (ret) {
+                       dev_err(qidev, "empty_retired_fq fail for FQ: %u\n",
+                               fq->fqid);
+                       return ret;
+               }
+       }
+
+       ret = qman_oos_fq(fq);
+       if (ret)
+               dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid);
+
+       qman_destroy_fq(fq);
+
+       return ret;
+}
+
+static int empty_caam_fq(struct qman_fq *fq)
+{
+       int ret;
+       struct qm_mcr_queryfq_np np;
+
+       /* Wait till the older CAAM FQ get empty */
+       do {
+               ret = qman_query_fq_np(fq, &np);
+               if (ret)
+                       return ret;
+
+               if (!qm_mcr_np_get(&np, frm_cnt))
+                       break;
+
+               msleep(20);
+       } while (1);
+
+       /*
+        * Give extra time for pending jobs from this FQ in holding tanks
+        * to get processed
+        */
+       msleep(20);
+       return 0;
+}
+
+int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
+{
+       int ret;
+       u32 num_words;
+       struct qman_fq *new_fq, *old_fq;
+       struct device *qidev = drv_ctx->qidev;
+
+       num_words = desc_len(sh_desc);
+       if (num_words > MAX_SDLEN) {
+               dev_err(qidev, "Invalid descriptor len: %d words\n", num_words);
+               return -EINVAL;
+       }
+
+       /* Note down older req FQ */
+       old_fq = drv_ctx->req_fq;
+
+       /* Create a new req FQ in parked state */
+       new_fq = create_caam_req_fq(drv_ctx->qidev, drv_ctx->rsp_fq,
+                                   drv_ctx->context_a, 0);
+       if (unlikely(IS_ERR_OR_NULL(new_fq))) {
+               dev_err(qidev, "FQ allocation for shdesc update failed\n");
+               return PTR_ERR(new_fq);
+       }
+
+       /* Hook up new FQ to context so that new requests keep queuing */
+       drv_ctx->req_fq = new_fq;
+
+       /* Empty and remove the older FQ */
+       ret = empty_caam_fq(old_fq);
+       if (ret) {
+               dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret);
+
+               /* We can revert to older FQ */
+               drv_ctx->req_fq = old_fq;
+
+               if (kill_fq(qidev, new_fq))
+                       dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
+                                new_fq->fqid);
+
+               return ret;
+       }
+
+       /*
+        * Re-initialise pre-header. Set RSLS and SDLEN.
+        * Update the shared descriptor for driver context.
+        */
+       drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) |
+                                          num_words);
+       memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc));
+       dma_sync_single_for_device(qidev, drv_ctx->context_a,
+                                  sizeof(drv_ctx->sh_desc) +
+                                  sizeof(drv_ctx->prehdr),
+                                  DMA_BIDIRECTIONAL);
+
+       /* Put the new FQ in scheduled state */
+       ret = qman_schedule_fq(new_fq);
+       if (ret) {
+               dev_err(qidev, "Fail to sched new CAAM FQ, ecode = %d\n", ret);
+
+               /*
+                * We can kill new FQ and revert to old FQ.
+                * Since the desc is already modified, it is success case
+                */
+
+               drv_ctx->req_fq = old_fq;
+
+               if (kill_fq(qidev, new_fq))
+                       dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
+                                new_fq->fqid);
+       } else if (kill_fq(qidev, old_fq)) {
+               dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(caam_drv_ctx_update);
+
+struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
+                                      int *cpu,
+                                      u32 *sh_desc)
+{
+       size_t size;
+       u32 num_words;
+       dma_addr_t hwdesc;
+       struct caam_drv_ctx *drv_ctx;
+       const cpumask_t *cpus = qman_affine_cpus();
+       static DEFINE_PER_CPU(int, last_cpu);
+
+       num_words = desc_len(sh_desc);
+       if (num_words > MAX_SDLEN) {
+               dev_err(qidev, "Invalid descriptor len: %d words\n",
+                       num_words);
+               return ERR_PTR(-EINVAL);
+       }
+
+       drv_ctx = kzalloc(sizeof(*drv_ctx), GFP_ATOMIC);
+       if (!drv_ctx)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * Initialise pre-header - set RSLS and SDLEN - and shared descriptor
+        * and dma-map them.
+        */
+       drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) |
+                                          num_words);
+       memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc));
+       size = sizeof(drv_ctx->prehdr) + sizeof(drv_ctx->sh_desc);
+       hwdesc = dma_map_single(qidev, drv_ctx->prehdr, size,
+                               DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(qidev, hwdesc)) {
+               dev_err(qidev, "DMA map error for preheader + shdesc\n");
+               kfree(drv_ctx);
+               return ERR_PTR(-ENOMEM);
+       }
+       drv_ctx->context_a = hwdesc;
+
+       /* If given CPU does not own the portal, choose another one that does */
+       if (!cpumask_test_cpu(*cpu, cpus)) {
+               int *pcpu = &get_cpu_var(last_cpu);
+
+               *pcpu = cpumask_next(*pcpu, cpus);
+               if (*pcpu >= nr_cpu_ids)
+                       *pcpu = cpumask_first(cpus);
+               *cpu = *pcpu;
+
+               put_cpu_var(last_cpu);
+       }
+       drv_ctx->cpu = *cpu;
+
+       /* Find response FQ hooked with this CPU */
+       drv_ctx->rsp_fq = per_cpu(pcpu_qipriv.rsp_fq, drv_ctx->cpu);
+
+       /* Attach request FQ */
+       drv_ctx->req_fq = create_caam_req_fq(qidev, drv_ctx->rsp_fq, hwdesc,
+                                            QMAN_INITFQ_FLAG_SCHED);
+       if (unlikely(IS_ERR_OR_NULL(drv_ctx->req_fq))) {
+               dev_err(qidev, "create_caam_req_fq failed\n");
+               dma_unmap_single(qidev, hwdesc, size, DMA_BIDIRECTIONAL);
+               kfree(drv_ctx);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       drv_ctx->qidev = qidev;
+       return drv_ctx;
+}
+EXPORT_SYMBOL(caam_drv_ctx_init);
+
+void *qi_cache_alloc(gfp_t flags)
+{
+       return kmem_cache_alloc(qi_cache, flags);
+}
+EXPORT_SYMBOL(qi_cache_alloc);
+
+void qi_cache_free(void *obj)
+{
+       kmem_cache_free(qi_cache, obj);
+}
+EXPORT_SYMBOL(qi_cache_free);
+
+static int caam_qi_poll(struct napi_struct *napi, int budget)
+{
+       struct caam_napi *np = container_of(napi, struct caam_napi, irqtask);
+
+       int cleaned = qman_p_poll_dqrr(np->p, budget);
+
+       if (cleaned < budget) {
+               napi_complete(napi);
+               qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+       }
+
+       return cleaned;
+}
+
+void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx)
+{
+       if (IS_ERR_OR_NULL(drv_ctx))
+               return;
+
+       /* Remove request FQ */
+       if (kill_fq(drv_ctx->qidev, drv_ctx->req_fq))
+               dev_err(drv_ctx->qidev, "Crypto session req FQ kill failed\n");
+
+       dma_unmap_single(drv_ctx->qidev, drv_ctx->context_a,
+                        sizeof(drv_ctx->sh_desc) + sizeof(drv_ctx->prehdr),
+                        DMA_BIDIRECTIONAL);
+       kfree(drv_ctx);
+}
+EXPORT_SYMBOL(caam_drv_ctx_rel);
+
+int caam_qi_shutdown(struct device *qidev)
+{
+       int i, ret;
+       struct caam_qi_priv *priv = dev_get_drvdata(qidev);
+       const cpumask_t *cpus = qman_affine_cpus();
+       struct cpumask old_cpumask = current->cpus_allowed;
+
+       for_each_cpu(i, cpus) {
+               struct napi_struct *irqtask;
+
+               irqtask = &per_cpu_ptr(&pcpu_qipriv.caam_napi, i)->irqtask;
+               napi_disable(irqtask);
+               netif_napi_del(irqtask);
+
+               if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
+                       dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
+               kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
+       }
+
+       /*
+        * QMan driver requires CGRs to be deleted from same CPU from where they
+        * were instantiated. Hence we get the module removal execute from the
+        * same CPU from where it was originally inserted.
+        */
+       set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
+
+       ret = qman_delete_cgr(&priv->cgr);
+       if (ret)
+               dev_err(qidev, "Deletion of CGR failed: %d\n", ret);
+       else
+               qman_release_cgrid(priv->cgr.cgrid);
+
+       kmem_cache_destroy(qi_cache);
+
+       /* Now that we're done with the CGRs, restore the cpus allowed mask */
+       set_cpus_allowed_ptr(current, &old_cpumask);
+
+       platform_device_unregister(priv->qi_pdev);
+       return ret;
+}
+
+static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
+{
+       caam_congested = congested;
+
+       if (congested) {
+#ifdef CONFIG_DEBUG_FS
+               times_congested++;
+#endif
+               pr_debug_ratelimited("CAAM entered congestion\n");
+
+       } else {
+               pr_debug_ratelimited("CAAM exited congestion\n");
+       }
+}
+
+static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np)
+{
+       /*
+        * In case of threaded ISR, for RT kernels in_irq() does not return
+        * appropriate value, so use in_serving_softirq to distinguish between
+        * softirq and irq contexts.
+        */
+       if (unlikely(in_irq() || !in_serving_softirq())) {
+               /* Disable QMan IRQ source and invoke NAPI */
+               qman_p_irqsource_remove(p, QM_PIRQ_DQRI);
+               np->p = p;
+               napi_schedule(&np->irqtask);
+               return 1;
+       }
+       return 0;
+}
+
+static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
+                                                   struct qman_fq *rsp_fq,
+                                                   const struct qm_dqrr_entry *dqrr)
+{
+       struct caam_napi *caam_napi = raw_cpu_ptr(&pcpu_qipriv.caam_napi);
+       struct caam_drv_req *drv_req;
+       const struct qm_fd *fd;
+       struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+       u32 status;
+
+       if (caam_qi_napi_schedule(p, caam_napi))
+               return qman_cb_dqrr_stop;
+
+       fd = &dqrr->fd;
+       status = be32_to_cpu(fd->status);
+       if (unlikely(status))
+               dev_err(qidev, "Error: %#x in CAAM response FD\n", status);
+
+       if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
+               dev_err(qidev, "Non-compound FD from CAAM\n");
+               return qman_cb_dqrr_consume;
+       }
+
+       drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd));
+       if (unlikely(!drv_req)) {
+               dev_err(qidev,
+                       "Can't find original request for caam response\n");
+               return qman_cb_dqrr_consume;
+       }
+
+       dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
+                        sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
+
+       drv_req->cbk(drv_req, status);
+       return qman_cb_dqrr_consume;
+}
+
+static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
+{
+       struct qm_mcc_initfq opts;
+       struct qman_fq *fq;
+       int ret;
+
+       fq = kzalloc(sizeof(*fq), GFP_KERNEL | GFP_DMA);
+       if (!fq)
+               return -ENOMEM;
+
+       fq->cb.dqrr = caam_rsp_fq_dqrr_cb;
+
+       ret = qman_create_fq(0, QMAN_FQ_FLAG_NO_ENQUEUE |
+                            QMAN_FQ_FLAG_DYNAMIC_FQID, fq);
+       if (ret) {
+               dev_err(qidev, "Rsp FQ create failed\n");
+               kfree(fq);
+               return -ENODEV;
+       }
+
+       memset(&opts, 0, sizeof(opts));
+       opts.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_DESTWQ |
+                                  QM_INITFQ_WE_CONTEXTB |
+                                  QM_INITFQ_WE_CONTEXTA | QM_INITFQ_WE_CGID);
+       opts.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_CTXASTASHING |
+                                      QM_FQCTRL_CPCSTASH | QM_FQCTRL_CGE);
+       qm_fqd_set_destwq(&opts.fqd, qman_affine_channel(cpu), 3);
+       opts.fqd.cgid = qipriv.cgr.cgrid;
+       opts.fqd.context_a.stashing.exclusive = QM_STASHING_EXCL_CTX |
+                                               QM_STASHING_EXCL_DATA;
+       qm_fqd_set_stashing(&opts.fqd, 0, 1, 1);
+
+       ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts);
+       if (ret) {
+               dev_err(qidev, "Rsp FQ init failed\n");
+               kfree(fq);
+               return -ENODEV;
+       }
+
+       per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq;
+
+       dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
+       return 0;
+}
+
+static int init_cgr(struct device *qidev)
+{
+       int ret;
+       struct qm_mcc_initcgr opts;
+       const u64 cpus = *(u64 *)qman_affine_cpus();
+       const int num_cpus = hweight64(cpus);
+       const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
+
+       ret = qman_alloc_cgrid(&qipriv.cgr.cgrid);
+       if (ret) {
+               dev_err(qidev, "CGR alloc failed for rsp FQs: %d\n", ret);
+               return ret;
+       }
+
+       qipriv.cgr.cb = cgr_cb;
+       memset(&opts, 0, sizeof(opts));
+       opts.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES |
+                                  QM_CGR_WE_MODE);
+       opts.cgr.cscn_en = QM_CGR_EN;
+       opts.cgr.mode = QMAN_CGR_MODE_FRAME;
+       qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, val, 1);
+
+       ret = qman_create_cgr(&qipriv.cgr, QMAN_CGR_FLAG_USE_INIT, &opts);
+       if (ret) {
+               dev_err(qidev, "Error %d creating CAAM CGRID: %u\n", ret,
+                       qipriv.cgr.cgrid);
+               return ret;
+       }
+
+       dev_info(qidev, "Congestion threshold set to %llu\n", val);
+       return 0;
+}
+
+static int alloc_rsp_fqs(struct device *qidev)
+{
+       int ret, i;
+       const cpumask_t *cpus = qman_affine_cpus();
+
+       /*Now create response FQs*/
+       for_each_cpu(i, cpus) {
+               ret = alloc_rsp_fq_cpu(qidev, i);
+               if (ret) {
+                       dev_err(qidev, "CAAM rsp FQ alloc failed, cpu: %u", i);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void free_rsp_fqs(void)
+{
+       int i;
+       const cpumask_t *cpus = qman_affine_cpus();
+
+       for_each_cpu(i, cpus)
+               kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
+}
+
+int caam_qi_init(struct platform_device *caam_pdev)
+{
+       int err, i;
+       struct platform_device *qi_pdev;
+       struct device *ctrldev = &caam_pdev->dev, *qidev;
+       struct caam_drv_private *ctrlpriv;
+       const cpumask_t *cpus = qman_affine_cpus();
+       struct cpumask old_cpumask = current->cpus_allowed;
+       static struct platform_device_info qi_pdev_info = {
+               .name = "caam_qi",
+               .id = PLATFORM_DEVID_NONE
+       };
+
+       /*
+        * QMAN requires CGRs to be removed from same CPU+portal from where it
+        * was originally allocated. Hence we need to note down the
+        * initialisation CPU and use the same CPU for module exit.
+        * We select the first CPU to from the list of portal owning CPUs.
+        * Then we pin module init to this CPU.
+        */
+       mod_init_cpu = cpumask_first(cpus);
+       set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu));
+
+       qi_pdev_info.parent = ctrldev;
+       qi_pdev_info.dma_mask = dma_get_mask(ctrldev);
+       qi_pdev = platform_device_register_full(&qi_pdev_info);
+       if (IS_ERR(qi_pdev))
+               return PTR_ERR(qi_pdev);
+
+       ctrlpriv = dev_get_drvdata(ctrldev);
+       qidev = &qi_pdev->dev;
+
+       qipriv.qi_pdev = qi_pdev;
+       dev_set_drvdata(qidev, &qipriv);
+
+       /* Initialize the congestion detection */
+       err = init_cgr(qidev);
+       if (err) {
+               dev_err(qidev, "CGR initialization failed: %d\n", err);
+               platform_device_unregister(qi_pdev);
+               return err;
+       }
+
+       /* Initialise response FQs */
+       err = alloc_rsp_fqs(qidev);
+       if (err) {
+               dev_err(qidev, "Can't allocate CAAM response FQs: %d\n", err);
+               free_rsp_fqs();
+               platform_device_unregister(qi_pdev);
+               return err;
+       }
+
+       /*
+        * Enable the NAPI contexts on each of the core which has an affine
+        * portal.
+        */
+       for_each_cpu(i, cpus) {
+               struct caam_qi_pcpu_priv *priv = per_cpu_ptr(&pcpu_qipriv, i);
+               struct caam_napi *caam_napi = &priv->caam_napi;
+               struct napi_struct *irqtask = &caam_napi->irqtask;
+               struct net_device *net_dev = &priv->net_dev;
+
+               net_dev->dev = *qidev;
+               INIT_LIST_HEAD(&net_dev->napi_list);
+
+               netif_napi_add(net_dev, irqtask, caam_qi_poll,
+                              CAAM_NAPI_WEIGHT);
+
+               napi_enable(irqtask);
+       }
+
+       /* Hook up QI device to parent controlling caam device */
+       ctrlpriv->qidev = qidev;
+
+       qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0,
+                                    SLAB_CACHE_DMA, NULL);
+       if (!qi_cache) {
+               dev_err(qidev, "Can't allocate CAAM cache\n");
+               free_rsp_fqs();
+               platform_device_unregister(qi_pdev);
+               return err;
+       }
+
+       /* Done with the CGRs; restore the cpus allowed mask */
+       set_cpus_allowed_ptr(current, &old_cpumask);
+#ifdef CONFIG_DEBUG_FS
+       ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444,
+                                                    ctrlpriv->ctl,
+                                                    &times_congested,
+                                                    &caam_fops_u64_ro);
+#endif
+       dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
+       return 0;
+}
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h

new file mode 100644 (file)

index 0000000..33b0433
--- /dev/null
+++ b/drivers/crypto/caam/qi.h
@@ -0,0 +1,201 @@
+/*
+ * Public definitions for the CAAM/QI (Queue Interface) backend.
+ *
+ * Copyright 2013-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016-2017 NXP
+ */
+
+#ifndef __QI_H__
+#define __QI_H__
+
+#include <soc/fsl/qman.h>
+#include "compat.h"
+#include "desc.h"
+#include "desc_constr.h"
+
+/*
+ * CAAM hardware constructs a job descriptor which points to a shared descriptor
+ * (as pointed by context_a of to-CAAM FQ).
+ * When the job descriptor is executed by DECO, the whole job descriptor
+ * together with shared descriptor gets loaded in DECO buffer, which is
+ * 64 words (each 32-bit) long.
+ *
+ * The job descriptor constructed by CAAM hardware has the following layout:
+ *
+ *     HEADER          (1 word)
+ *     Shdesc ptr      (1 or 2 words)
+ *     SEQ_OUT_PTR     (1 word)
+ *     Out ptr         (1 or 2 words)
+ *     Out length      (1 word)
+ *     SEQ_IN_PTR      (1 word)
+ *     In ptr          (1 or 2 words)
+ *     In length       (1 word)
+ *
+ * The shdesc ptr is used to fetch shared descriptor contents into DECO buffer.
+ *
+ * Apart from shdesc contents, the total number of words that get loaded in DECO
+ * buffer are '8' or '11'. The remaining words in DECO buffer can be used for
+ * storing shared descriptor.
+ */
+#define MAX_SDLEN      ((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
+
+extern bool caam_congested __read_mostly;
+
+/*
+ * This is the request structure the driver application should fill while
+ * submitting a job to driver.
+ */
+struct caam_drv_req;
+
+/*
+ * caam_qi_cbk - application's callback function invoked by the driver when the
+ *               request has been successfully processed.
+ * @drv_req: original request that was submitted
+ * @status: completion status of request (0 - success, non-zero - error code)
+ */
+typedef void (*caam_qi_cbk)(struct caam_drv_req *drv_req, u32 status);
+
+enum optype {
+       ENCRYPT,
+       DECRYPT,
+       GIVENCRYPT,
+       NUM_OP
+};
+
+/**
+ * caam_drv_ctx - CAAM/QI backend driver context
+ *
+ * The jobs are processed by the driver against a driver context.
+ * With every cryptographic context, a driver context is attached.
+ * The driver context contains data for private use by driver.
+ * For the applications, this is an opaque structure.
+ *
+ * @prehdr: preheader placed before shrd desc
+ * @sh_desc: shared descriptor
+ * @context_a: shared descriptor dma address
+ * @req_fq: to-CAAM request frame queue
+ * @rsp_fq: from-CAAM response frame queue
+ * @cpu: cpu on which to receive CAAM response
+ * @op_type: operation type
+ * @qidev: device pointer for CAAM/QI backend
+ */
+struct caam_drv_ctx {
+       u32 prehdr[2];
+       u32 sh_desc[MAX_SDLEN];
+       dma_addr_t context_a;
+       struct qman_fq *req_fq;
+       struct qman_fq *rsp_fq;
+       int cpu;
+       enum optype op_type;
+       struct device *qidev;
+} ____cacheline_aligned;
+
+/**
+ * caam_drv_req - The request structure the driver application should fill while
+ *                submitting a job to driver.
+ * @fd_sgt: QMan S/G pointing to output (fd_sgt[0]) and input (fd_sgt[1])
+ *          buffers.
+ * @cbk: callback function to invoke when job is completed
+ * @app_ctx: arbitrary context attached with request by the application
+ *
+ * The fields mentioned below should not be used by application.
+ * These are for private use by driver.
+ *
+ * @hdr__: linked list header to maintain list of outstanding requests to CAAM
+ * @hwaddr: DMA address for the S/G table.
+ */
+struct caam_drv_req {
+       struct qm_sg_entry fd_sgt[2];
+       struct caam_drv_ctx *drv_ctx;
+       caam_qi_cbk cbk;
+       void *app_ctx;
+} ____cacheline_aligned;
+
+/**
+ * caam_drv_ctx_init - Initialise a CAAM/QI driver context
+ *
+ * A CAAM/QI driver context must be attached with each cryptographic context.
+ * This function allocates memory for CAAM/QI context and returns a handle to
+ * the application. This handle must be submitted along with each enqueue
+ * request to the driver by the application.
+ *
+ * @cpu: CPU where the application prefers to the driver to receive CAAM
+ *       responses. The request completion callback would be issued from this
+ *       CPU.
+ * @sh_desc: shared descriptor pointer to be attached with CAAM/QI driver
+ *           context.
+ *
+ * Returns a driver context on success or negative error code on failure.
+ */
+struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, int *cpu,
+                                      u32 *sh_desc);
+
+/**
+ * caam_qi_enqueue - Submit a request to QI backend driver.
+ *
+ * The request structure must be properly filled as described above.
+ *
+ * @qidev: device pointer for QI backend
+ * @req: CAAM QI request structure
+ *
+ * Returns 0 on success or negative error code on failure.
+ */
+int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req);
+
+/**
+ * caam_drv_ctx_busy - Check if there are too many jobs pending with CAAM
+ *                    or too many CAAM responses are pending to be processed.
+ * @drv_ctx: driver context for which job is to be submitted
+ *
+ * Returns caam congestion status 'true/false'
+ */
+bool caam_drv_ctx_busy(struct caam_drv_ctx *drv_ctx);
+
+/**
+ * caam_drv_ctx_update - Update QI driver context
+ *
+ * Invoked when shared descriptor is required to be change in driver context.
+ *
+ * @drv_ctx: driver context to be updated
+ * @sh_desc: new shared descriptor pointer to be updated in QI driver context
+ *
+ * Returns 0 on success or negative error code on failure.
+ */
+int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc);
+
+/**
+ * caam_drv_ctx_rel - Release a QI driver context
+ * @drv_ctx: context to be released
+ */
+void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx);
+
+int caam_qi_init(struct platform_device *pdev);
+int caam_qi_shutdown(struct device *dev);
+
+/**
+ * qi_cache_alloc - Allocate buffers from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) needs data which has
+ * to be allocated on the hotpath. Instead of using malloc, one can use the
+ * services of the CAAM QI memory cache (backed by kmem_cache). The buffers
+ * will have a size of 256B, which is sufficient for hosting 16 SG entries.
+ *
+ * @flags: flags that would be used for the equivalent malloc(..) call
+ *
+ * Returns a pointer to a retrieved buffer on success or NULL on failure.
+ */
+void *qi_cache_alloc(gfp_t flags);
+
+/**
+ * qi_cache_free - Frees buffers allocated from CAAM-QI cache
+ *
+ * Invoked when a user of the CAAM-QI (i.e. caamalg-qi) no longer needs
+ * the buffer previously allocated by a qi_cache_alloc call.
+ * No checking is being done, the call is a passthrough call to
+ * kmem_cache_free(...)
+ *
+ * @obj: object previously allocated using qi_cache_alloc()
+ */
+void qi_cache_free(void *obj);
+
+#endif /* __QI_H__ */
author	Horia Geantă <horia.geanta@nxp.com>
	Fri, 17 Mar 2017 10:06:01 +0000 (12:06 +0200)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Fri, 24 Mar 2017 14:02:59 +0000 (22:02 +0800)
drivers/crypto/caam/Makefile		patch \| blob \| history
drivers/crypto/caam/ctrl.c		patch \| blob \| history
drivers/crypto/caam/intern.h		patch \| blob \| history
drivers/crypto/caam/qi.c	[new file with mode: 0644]	patch \| blob
drivers/crypto/caam/qi.h	[new file with mode: 0644]	patch \| blob