#include <linux/crypto.h>
 #include <linux/workqueue.h>
 #include <linux/backing-dev.h>
+#include <linux/percpu.h>
 #include <asm/atomic.h>
 #include <linux/scatterlist.h>
 #include <asm/page.h>
 };
 
 struct iv_essiv_private {
-       struct crypto_cipher *tfm;
        struct crypto_hash *hash_tfm;
        u8 *salt;
 };
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+
+/*
+ * Duplicated per-CPU state for cipher.
+ */
+struct crypt_cpu {
+       struct ablkcipher_request *req;
+       struct crypto_ablkcipher *tfm;
+
+       /* ESSIV: struct crypto_cipher *essiv_tfm */
+       void *iv_private;
+};
+
+/*
+ * The fields in here must be read only after initialization,
+ * changing state should be in crypt_cpu.
+ */
 struct crypt_config {
        struct dm_dev *dev;
        sector_t start;
        sector_t iv_offset;
        unsigned int iv_size;
 
+       /*
+        * Duplicated per cpu state. Access through
+        * per_cpu_ptr() only.
+        */
+       struct crypt_cpu __percpu *cpu;
+
        /*
         * Layout of each crypto request:
         *
         * correctly aligned.
         */
        unsigned int dmreq_start;
-       struct ablkcipher_request *req;
 
-       struct crypto_ablkcipher *tfm;
        unsigned long flags;
        unsigned int key_size;
        u8 key[0];
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
 
+static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
+{
+       return this_cpu_ptr(cc->cpu);
+}
+
+/*
+ * Use this to access cipher attributes that are the same for each CPU.
+ */
+static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
+{
+       return __this_cpu_ptr(cc->cpu)->tfm;
+}
+
 /*
  * Different IV generation algorithms:
  *
        struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
        struct hash_desc desc;
        struct scatterlist sg;
-       int err;
+       struct crypto_cipher *essiv_tfm;
+       int err, cpu;
 
        sg_init_one(&sg, cc->key, cc->key_size);
        desc.tfm = essiv->hash_tfm;
        if (err)
                return err;
 
-       return crypto_cipher_setkey(essiv->tfm, essiv->salt,
+       for_each_possible_cpu(cpu) {
+               essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private,
+
+               err = crypto_cipher_setkey(essiv_tfm, essiv->salt,
                                    crypto_hash_digestsize(essiv->hash_tfm));
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 /* Wipe salt and reset key derived from volume key */
 {
        struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
        unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
+       struct crypto_cipher *essiv_tfm;
+       int cpu, r, err = 0;
 
        memset(essiv->salt, 0, salt_size);
 
-       return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size);
+       for_each_possible_cpu(cpu) {
+               essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private;
+               r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size);
+               if (r)
+                       err = r;
+       }
+
+       return err;
+}
+
+/* Set up per cpu cipher state */
+static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc,
+                                            struct dm_target *ti,
+                                            u8 *salt, unsigned saltsize)
+{
+       struct crypto_cipher *essiv_tfm;
+       int err;
+
+       /* Setup the essiv_tfm with the given salt */
+       essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(essiv_tfm)) {
+               ti->error = "Error allocating crypto tfm for ESSIV";
+               return essiv_tfm;
+       }
+
+       if (crypto_cipher_blocksize(essiv_tfm) !=
+           crypto_ablkcipher_ivsize(any_tfm(cc))) {
+               ti->error = "Block size of ESSIV cipher does "
+                           "not match IV size of block cipher";
+               crypto_free_cipher(essiv_tfm);
+               return ERR_PTR(-EINVAL);
+       }
+
+       err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
+       if (err) {
+               ti->error = "Failed to set key for ESSIV cipher";
+               crypto_free_cipher(essiv_tfm);
+               return ERR_PTR(err);
+       }
+
+       return essiv_tfm;
 }
 
 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
 {
+       int cpu;
+       struct crypt_cpu *cpu_cc;
+       struct crypto_cipher *essiv_tfm;
        struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
 
-       crypto_free_cipher(essiv->tfm);
-       essiv->tfm = NULL;
-
        crypto_free_hash(essiv->hash_tfm);
        essiv->hash_tfm = NULL;
 
        kzfree(essiv->salt);
        essiv->salt = NULL;
+
+       for_each_possible_cpu(cpu) {
+               cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+               essiv_tfm = cpu_cc->iv_private;
+
+               if (essiv_tfm)
+                       crypto_free_cipher(essiv_tfm);
+
+               cpu_cc->iv_private = NULL;
+       }
 }
 
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
        struct crypto_cipher *essiv_tfm = NULL;
        struct crypto_hash *hash_tfm = NULL;
        u8 *salt = NULL;
-       int err;
+       int err, cpu;
 
        if (!opts) {
                ti->error = "Digest algorithm missing for ESSIV mode";
                goto bad;
        }
 
-       /* Allocate essiv_tfm */
-       essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(essiv_tfm)) {
-               ti->error = "Error allocating crypto tfm for ESSIV";
-               err = PTR_ERR(essiv_tfm);
-               goto bad;
-       }
-       if (crypto_cipher_blocksize(essiv_tfm) !=
-           crypto_ablkcipher_ivsize(cc->tfm)) {
-               ti->error = "Block size of ESSIV cipher does "
-                           "not match IV size of block cipher";
-               err = -EINVAL;
-               goto bad;
-       }
-
        cc->iv_gen_private.essiv.salt = salt;
-       cc->iv_gen_private.essiv.tfm = essiv_tfm;
        cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
 
+       for_each_possible_cpu(cpu) {
+               essiv_tfm = setup_essiv_cpu(cc, ti, salt,
+                                       crypto_hash_digestsize(hash_tfm));
+               if (IS_ERR(essiv_tfm)) {
+                       crypt_iv_essiv_dtr(cc);
+                       return PTR_ERR(essiv_tfm);
+               }
+               per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm;
+       }
+
        return 0;
 
 bad:
-       if (essiv_tfm && !IS_ERR(essiv_tfm))
-               crypto_free_cipher(essiv_tfm);
        if (hash_tfm && !IS_ERR(hash_tfm))
                crypto_free_hash(hash_tfm);
        kfree(salt);
 
 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 {
+       struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
+
        memset(iv, 0, cc->iv_size);
        *(u64 *)iv = cpu_to_le64(sector);
-       crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv);
+       crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
+
        return 0;
 }
 
 static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
                              const char *opts)
 {
-       unsigned bs = crypto_ablkcipher_blocksize(cc->tfm);
+       unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc));
        int log = ilog2(bs);
 
        /* we need to calculate how far we must shift the sector count
 
        dmreq = dmreq_of_req(cc, req);
        iv = (u8 *)ALIGN((unsigned long)(dmreq + 1),
-                        crypto_ablkcipher_alignmask(cc->tfm) + 1);
+                        crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
 
        dmreq->ctx = ctx;
        sg_init_table(&dmreq->sg_in, 1);
 
 static void kcryptd_async_done(struct crypto_async_request *async_req,
                               int error);
+
 static void crypt_alloc_req(struct crypt_config *cc,
                            struct convert_context *ctx)
 {
-       if (!cc->req)
-               cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
-       ablkcipher_request_set_tfm(cc->req, cc->tfm);
-       ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG |
-                                       CRYPTO_TFM_REQ_MAY_SLEEP,
-                                       kcryptd_async_done,
-                                       dmreq_of_req(cc, cc->req));
+       struct crypt_cpu *this_cc = this_crypt_config(cc);
+
+       if (!this_cc->req)
+               this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+
+       ablkcipher_request_set_tfm(this_cc->req, this_cc->tfm);
+       ablkcipher_request_set_callback(this_cc->req,
+           CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+           kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
 }
 
 /*
 static int crypt_convert(struct crypt_config *cc,
                         struct convert_context *ctx)
 {
+       struct crypt_cpu *this_cc = this_crypt_config(cc);
        int r;
 
        atomic_set(&ctx->pending, 1);
 
                atomic_inc(&ctx->pending);
 
-               r = crypt_convert_block(cc, ctx, cc->req);
+               r = crypt_convert_block(cc, ctx, this_cc->req);
 
                switch (r) {
                /* async */
                        INIT_COMPLETION(ctx->restart);
                        /* fall through*/
                case -EINPROGRESS:
-                       cc->req = NULL;
+                       this_cc->req = NULL;
                        ctx->sector++;
                        continue;
 
  * They must be separated as otherwise the final stages could be
  * starved by new requests which can block in the first stages due
  * to memory allocation.
+ *
+ * The work is done per CPU global for all dm-crypt instances.
+ * They should not depend on each other and do not block.
  */
 static void crypt_endio(struct bio *clone, int error)
 {
        }
 }
 
+static int crypt_setkey_allcpus(struct crypt_config *cc)
+{
+       int cpu, err = 0, r;
+
+       for_each_possible_cpu(cpu) {
+               r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfm,
+                                              cc->key, cc->key_size);
+               if (r)
+                       err = r;
+       }
+
+       return err;
+}
+
 static int crypt_set_key(struct crypt_config *cc, char *key)
 {
        /* The key size may not be changed. */
 
        set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 
-       return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
+       return crypt_setkey_allcpus(cc);
 }
 
 static int crypt_wipe_key(struct crypt_config *cc)
 {
        clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
        memset(&cc->key, 0, cc->key_size * sizeof(u8));
-       return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
+
+       return crypt_setkey_allcpus(cc);
 }
 
 static void crypt_dtr(struct dm_target *ti)
 {
        struct crypt_config *cc = ti->private;
+       struct crypt_cpu *cpu_cc;
+       int cpu;
 
        ti->private = NULL;
 
        if (cc->crypt_queue)
                destroy_workqueue(cc->crypt_queue);
 
+       if (cc->cpu)
+               for_each_possible_cpu(cpu) {
+                       cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+                       if (cpu_cc->req)
+                               mempool_free(cpu_cc->req, cc->req_pool);
+                       if (cpu_cc->tfm)
+                               crypto_free_ablkcipher(cpu_cc->tfm);
+               }
+
        if (cc->bs)
                bioset_free(cc->bs);
 
        if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
                cc->iv_gen_ops->dtr(cc);
 
-       if (cc->tfm && !IS_ERR(cc->tfm))
-               crypto_free_ablkcipher(cc->tfm);
-
        if (cc->dev)
                dm_put_device(ti, cc->dev);
 
+       if (cc->cpu)
+               free_percpu(cc->cpu);
+
        kzfree(cc->cipher);
        kzfree(cc->cipher_string);
 
                            char *cipher_in, char *key)
 {
        struct crypt_config *cc = ti->private;
+       struct crypto_ablkcipher *tfm;
        char *tmp, *cipher, *chainmode, *ivmode, *ivopts;
        char *cipher_api = NULL;
-       int ret = -EINVAL;
+       int cpu, ret = -EINVAL;
 
        /* Convert to crypto api definition? */
        if (strchr(cipher_in, '(')) {
        if (tmp)
                DMWARN("Ignoring unexpected additional cipher options");
 
+       cc->cpu = alloc_percpu(struct crypt_cpu);
+       if (!cc->cpu) {
+               ti->error = "Cannot allocate per cpu state";
+               goto bad_mem;
+       }
+
        /*
         * For compatibility with the original dm-crypt mapping format, if
         * only the cipher name is supplied, use cbc-plain.
        }
 
        /* Allocate cipher */
-       cc->tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
-       if (IS_ERR(cc->tfm)) {
-               ret = PTR_ERR(cc->tfm);
-               ti->error = "Error allocating crypto tfm";
-               goto bad;
+       for_each_possible_cpu(cpu) {
+               tfm = crypto_alloc_ablkcipher(cipher_api, 0, 0);
+               if (IS_ERR(tfm)) {
+                       ret = PTR_ERR(tfm);
+                       ti->error = "Error allocating crypto tfm";
+                       goto bad;
+               }
+               per_cpu_ptr(cc->cpu, cpu)->tfm = tfm;
        }
 
        /* Initialize and set key */
        }
 
        /* Initialize IV */
-       cc->iv_size = crypto_ablkcipher_ivsize(cc->tfm);
+       cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc));
        if (cc->iv_size)
                /* at least a 64 bit sector number should fit in our buffer */
                cc->iv_size = max(cc->iv_size,
        }
 
        cc->dmreq_start = sizeof(struct ablkcipher_request);
-       cc->dmreq_start += crypto_ablkcipher_reqsize(cc->tfm);
+       cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
        cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
-       cc->dmreq_start += crypto_ablkcipher_alignmask(cc->tfm) &
+       cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
                           ~(crypto_tfm_ctx_alignment() - 1);
 
        cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
                ti->error = "Cannot allocate crypt request mempool";
                goto bad;
        }
-       cc->req = NULL;
 
        cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
        if (!cc->page_pool) {
        cc->start = tmpll;
 
        ret = -ENOMEM;
-       cc->io_queue = create_singlethread_workqueue("kcryptd_io");
+       cc->io_queue = alloc_workqueue("kcryptd_io",
+                                      WQ_NON_REENTRANT|
+                                      WQ_MEM_RECLAIM,
+                                      1);
        if (!cc->io_queue) {
                ti->error = "Couldn't create kcryptd io queue";
                goto bad;
        }
 
-       cc->crypt_queue = create_singlethread_workqueue("kcryptd");
+       cc->crypt_queue = alloc_workqueue("kcryptd",
+                                         WQ_NON_REENTRANT|
+                                         WQ_CPU_INTENSIVE|
+                                         WQ_MEM_RECLAIM,
+                                         1);
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
                goto bad;
 
 static struct target_type crypt_target = {
        .name   = "crypt",
-       .version = {1, 8, 0},
+       .version = {1, 9, 0},
        .module = THIS_MODULE,
        .ctr    = crypt_ctr,
        .dtr    = crypt_dtr,