Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
        veor            \out, \out, \tmp
        .endm
 
-       .align          4
-.Lxts_mul_x:
-       .quad           1, 0x87
-
        /*
         * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
         *                   int blocks, u8 iv[])
         */
 __xts_prepare8:
        vld1.8          {q14}, [r7]             // load iv
-       __ldr           q15, .Lxts_mul_x        // load tweak mask
+       vmov.i32        d30, #0x87              // compose tweak mask vector
+       vmovl.u32       q15, d30
+       vshr.u64        d30, d31, #7
        vmov            q12, q14
 
        __adr           ip, 0f