/*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
                                 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
                      unsigned int key_len)
 {
        return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+                          u32 abytes, u32 *macp, bool use_neon)
+{
+       if (likely(use_neon)) {
+               ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+                                    num_rounds(key));
+       } else {
+               if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+                       int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+                       crypto_xor(&mac[*macp], in, added);
+
+                       *macp += added;
+                       in += added;
+                       abytes -= added;
+               }
+
+               while (abytes > AES_BLOCK_SIZE) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+                       in += AES_BLOCK_SIZE;
+                       abytes -= AES_BLOCK_SIZE;
+               }
+
+               if (abytes > 0) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, abytes);
+                       *macp = abytes;
+               } else {
+                       *macp = 0;
+               }
+       }
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+                                  bool use_neon)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
                ltag.len = 6;
        }
 
-       ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc,
-                            num_rounds(ctx));
+       ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp, use_neon);
        scatterwalk_start(&walk, req->src);
 
        do {
                        n = scatterwalk_clamp(&walk, len);
                }
                p = scatterwalk_map(&walk);
-               ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-                                    num_rounds(ctx));
+               ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
                len -= n;
 
                scatterwalk_unmap(p);
        } while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+                             struct crypto_aes_ctx *ctx, bool enc)
+{
+       u8 buf[AES_BLOCK_SIZE];
+       int err = 0;
+
+       while (walk->nbytes) {
+               int blocks = walk->nbytes / AES_BLOCK_SIZE;
+               u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+               u8 *dst = walk->dst.virt.addr;
+               u8 *src = walk->src.virt.addr;
+               u32 nbytes = walk->nbytes;
+
+               if (nbytes == walk->total && tail > 0) {
+                       blocks++;
+                       tail = 0;
+               }
+
+               do {
+                       u32 bsize = AES_BLOCK_SIZE;
+
+                       if (nbytes < AES_BLOCK_SIZE)
+                               bsize = nbytes;
+
+                       crypto_inc(walk->iv, AES_BLOCK_SIZE);
+                       __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+                                           num_rounds(ctx));
+                       __aes_arm64_encrypt(ctx->key_enc, mac, mac,
+                                           num_rounds(ctx));
+                       if (enc)
+                               crypto_xor(mac, src, bsize);
+                       crypto_xor_cpy(dst, src, buf, bsize);
+                       if (!enc)
+                               crypto_xor(mac, dst, bsize);
+                       dst += bsize;
+                       src += bsize;
+                       nbytes -= bsize;
+               } while (--blocks);
+
+               err = skcipher_walk_done(walk, tail);
+       }
+
+       if (!err) {
+               __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+               __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+               crypto_xor(mac, buf, AES_BLOCK_SIZE);
+       }
+       return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       ce_aes_ccm_encrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-       kernel_neon_end();
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+       }
        if (err)
                return err;
 
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       ce_aes_ccm_decrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
-       kernel_neon_end();
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+       }
 
        if (err)
                return err;