x86/crc32: update prototype for crc_pcl()

author Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:08:36 +0000 (17:08 -0800)

committer Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
author Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:08:36 +0000 (17:08 -0800)
committer Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c

index 52c5d47ef5a14e97a82ae304a264130a33c5e6ae..603d159de4007ca0fd6772829b276739d9d310a0 100644 (file)
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -41,8 +41,7 @@
   */
  #define CRC32C_PCL_BREAKEVEN   512
  
-asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
-                               unsigned int crc_init);
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
  #endif /* CONFIG_X86_64 */
  
  static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -159,7 +158,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
          */
         if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
                 kernel_fpu_begin();
-               *crcp = crc_pcl(data, len, *crcp);
+               *crcp = crc32c_x86_3way(*crcp, data, len);
                 kernel_fpu_end();
         } else
                 *crcp = crc32c_intel_le_hw(*crcp, data, len);
@@ -171,7 +170,7 @@ static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
  {
         if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
                 kernel_fpu_begin();
-               *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
+               *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
                 kernel_fpu_end();
         } else
                 *(__le32 *)out =
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S

index 752812bc4991df2778c1b9a4639148c0f231e981..9b8770503bbcdf31b0e85ef368c69362cae8785e 100644 (file)
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -52,15 +52,16 @@
  # regular CRC code that does not interleave the CRC instructions.
  #define SMALL_SIZE 200
  
-# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
+# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
  
  .text
-SYM_FUNC_START(crc_pcl)
-#define    bufp                  %rdi
-#define    bufp_d        %edi
-#define    len           %esi
-#define    crc_init      %edx
-#define    crc_init_q    %rdx
+SYM_FUNC_START(crc32c_x86_3way)
+#define    crc0                  %edi
+#define    crc0_q        %rdi
+#define    bufp                  %rsi
+#define    bufp_d        %esi
+#define    len           %rdx
+#define    len_dw        %edx
  #define    n_misaligned          %ecx /* overlaps chunk_bytes! */
  #define    n_misaligned_q %rcx
  #define    chunk_bytes   %ecx /* overlaps n_misaligned! */
@@ -85,9 +86,9 @@ SYM_FUNC_START(crc_pcl)
  .Ldo_align:
         movq    (bufp), %rax
         add     n_misaligned_q, bufp
-       sub     n_misaligned, len
+       sub     n_misaligned_q, len
  .Lalign_loop:
-       crc32b  %al, crc_init           # compute crc32 of 1-byte
+       crc32b  %al, crc0               # compute crc32 of 1-byte
         shr     $8, %rax                # get next byte
         dec     n_misaligned
         jne     .Lalign_loop
@@ -102,7 +103,7 @@ SYM_FUNC_START(crc_pcl)
  
  .Lpartial_block:
         # Compute floor(len / 24) to get num qwords to process from each lane.
-       imul    $2731, len, %eax        # 2731 = ceil(2^16 / 24)
+       imul    $2731, len_dw, %eax     # 2731 = ceil(2^16 / 24)
         shr     $16, %eax
         jmp     .Lcrc_3lanes
  
@@ -125,16 +126,16 @@ SYM_FUNC_START(crc_pcl)
         # Unroll the loop by a factor of 4 to reduce the overhead of the loop
         # bookkeeping instructions, which can compete with crc32q for the ALUs.
  .Lcrc_3lanes_4x_loop:
-       crc32q  (bufp), crc_init_q
+       crc32q  (bufp), crc0_q
         crc32q  (bufp,chunk_bytes_q), crc1
         crc32q  (bufp,chunk_bytes_q,2), crc2
-       crc32q  8(bufp), crc_init_q
+       crc32q  8(bufp), crc0_q
         crc32q  8(bufp,chunk_bytes_q), crc1
         crc32q  8(bufp,chunk_bytes_q,2), crc2
-       crc32q  16(bufp), crc_init_q
+       crc32q  16(bufp), crc0_q
         crc32q  16(bufp,chunk_bytes_q), crc1
         crc32q  16(bufp,chunk_bytes_q,2), crc2
-       crc32q  24(bufp), crc_init_q
+       crc32q  24(bufp), crc0_q
         crc32q  24(bufp,chunk_bytes_q), crc1
         crc32q  24(bufp,chunk_bytes_q,2), crc2
         add     $32, bufp
@@ -146,7 +147,7 @@ SYM_FUNC_START(crc_pcl)
         jz      .Lcrc_3lanes_last_qword
  
  .Lcrc_3lanes_1x_loop:
-       crc32q  (bufp), crc_init_q
+       crc32q  (bufp), crc0_q
         crc32q  (bufp,chunk_bytes_q), crc1
         crc32q  (bufp,chunk_bytes_q,2), crc2
         add     $8, bufp
@@ -154,7 +155,7 @@ SYM_FUNC_START(crc_pcl)
         jnz     .Lcrc_3lanes_1x_loop
  
  .Lcrc_3lanes_last_qword:
-       crc32q  (bufp), crc_init_q
+       crc32q  (bufp), crc0_q
         crc32q  (bufp,chunk_bytes_q), crc1
  # SKIP  crc32q (bufp,chunk_bytes_q,2), crc2    ; Don't do this one yet
  
@@ -165,9 +166,9 @@ SYM_FUNC_START(crc_pcl)
         lea     (K_table-8)(%rip), %rax         # first entry is for idx 1
         pmovzxdq (%rax,chunk_bytes_q), %xmm0    # 2 consts: K1:K2
         lea     (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
-       sub     %eax, len                       # len -= chunk_bytes * 3
+       sub     %rax, len                       # len -= chunk_bytes * 3
  
-       movq    crc_init_q, %xmm1               # CRC for block 1
+       movq    crc0_q, %xmm1                   # CRC for block 1
         pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
  
         movq    crc1, %xmm2                     # CRC for block 2
@@ -176,8 +177,8 @@ SYM_FUNC_START(crc_pcl)
         pxor    %xmm2,%xmm1
         movq    %xmm1, %rax
         xor     (bufp,chunk_bytes_q,2), %rax
-       mov     crc2, crc_init_q
-       crc32   %rax, crc_init_q
+       mov     crc2, crc0_q
+       crc32   %rax, crc0_q
         lea     8(bufp,chunk_bytes_q,2), bufp
  
         ################################################################
@@ -193,34 +194,34 @@ SYM_FUNC_START(crc_pcl)
         ## 6) Process any remainder without interleaving:
         #######################################################################
  .Lsmall:
-       test    len, len
+       test    len_dw, len_dw
         jz      .Ldone
-       mov     len, %eax
+       mov     len_dw, %eax
         shr     $3, %eax
         jz      .Ldo_dword
  .Ldo_qwords:
-       crc32q  (bufp), crc_init_q
+       crc32q  (bufp), crc0_q
         add     $8, bufp
         dec     %eax
         jnz     .Ldo_qwords
  .Ldo_dword:
-       test    $4, len
+       test    $4, len_dw
         jz      .Ldo_word
-       crc32l  (bufp), crc_init
+       crc32l  (bufp), crc0
         add     $4, bufp
  .Ldo_word:
-       test    $2, len
+       test    $2, len_dw
         jz      .Ldo_byte
-       crc32w  (bufp), crc_init
+       crc32w  (bufp), crc0
         add     $2, bufp
  .Ldo_byte:
-       test    $1, len
+       test    $1, len_dw
         jz      .Ldone
-       crc32b  (bufp), crc_init
+       crc32b  (bufp), crc0
  .Ldone:
-       mov     crc_init, %eax
+       mov     crc0, %eax
          RET
-SYM_FUNC_END(crc_pcl)
+SYM_FUNC_END(crc32c_x86_3way)
  
  .section       .rodata, "a", @progbits
         ################################################################
author	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:08:36 +0000 (17:08 -0800)
committer	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
arch/x86/crypto/crc32c-intel_glue.c		patch \| blob \| history
arch/x86/crypto/crc32c-pcl-intel-asm_64.S		patch \| blob \| history