From bdb86e9c9bcc490d2425290566e83853e4e10f93 Mon Sep 17 00:00:00 2001 From: Matt Sealey Date: Wed, 5 Dec 2018 11:14:23 +1100 Subject: [PATCH] lib/lzo: clean-up by introducing COPY16 Most compilers should be able to merge adjacent loads/stores of sizes which are less than but effect a multiple of a machine word size (in effect a memcpy() of a constant amount). However the semantics of the macro are that it just does the copy, the pointer increment is in the code, hence we see *a = *b a += 8 b += 8 *a = *b a += 8 b += 8 This introduces a dependency between the two groups of statements which seems to defeat said compiler optimizers and generate some very strange sequences of addition and subtraction of address offsets (i.e. it is overcomplicated). Since COPY8 is only ever used to copy amounts of 16 bytes (in pairs), just define COPY16 as COPY8,COPY8. We leave the definition to preserve the need to do unaligned accesses to machine-sized words per the original code intent, we just don't use it in the code proper. COPY16 then gives us code like: *a = *b *(a+8) = *(b+8) a += 16 b += 16 This seems to allow compilers to generate much better code by using base register writeback or simply positively incrementing offsets which seems to positively affect performance. It is, at least, fewer instructions to do the same job. Link: http://lkml.kernel.org/r/20181127161913.23863-3-dave.rodgman@arm.com Signed-off-by: Matt Sealey Signed-off-by: Dave Rodgman Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: Markus F.X.J. Oberhumer Cc: Minchan Kim Cc: Nitin Gupta Cc: Richard Purdie Cc: Sergey Senozhatsky Cc: Sonny Rao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/lzo/lzo1x_compress.c | 9 +++------ lib/lzo/lzo1x_decompress_safe.c | 18 ++++++------------ lib/lzo/lzodefs.h | 3 +++ 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 236eb21167b5..82fb5571ce5e 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -60,8 +60,7 @@ next: op += t; } else if (t <= 16) { *op++ = (t - 3); - COPY8(op, ii); - COPY8(op + 8, ii + 8); + COPY16(op, ii); op += t; } else { if (t <= 18) { @@ -76,8 +75,7 @@ next: *op++ = tt; } do { - COPY8(op, ii); - COPY8(op + 8, ii + 8); + COPY16(op, ii); op += 16; ii += 16; t -= 16; @@ -255,8 +253,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, *op++ = tt; } if (t >= 16) do { - COPY8(op, ii); - COPY8(op + 8, ii + 8); + COPY16(op, ii); op += 16; ii += 16; t -= 16; diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index a1c387f6afba..aa95d3066b7d 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -86,12 +86,9 @@ copy_literal_run: const unsigned char *ie = ip + t; unsigned char *oe = op + t; do { - COPY8(op, ip); - op += 8; - ip += 8; - COPY8(op, ip); - op += 8; - ip += 8; + COPY16(op, ip); + op += 16; + ip += 16; } while (ip < ie); ip = ie; op = oe; @@ -187,12 +184,9 @@ copy_literal_run: unsigned char *oe = op + t; if (likely(HAVE_OP(t + 15))) { do { - COPY8(op, m_pos); - op += 8; - m_pos += 8; - COPY8(op, m_pos); - op += 8; - m_pos += 8; + COPY16(op, m_pos); + op += 16; + m_pos += 16; } while (op < oe); op = oe; if (HAVE_IP(6)) { diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h index 497f9c9f03a8..e1b3cf6459a9 100644 --- a/lib/lzo/lzodefs.h +++ b/lib/lzo/lzodefs.h @@ -23,6 +23,9 @@ COPY4(dst, src); COPY4((dst) + 4, (src) + 4) #endif +#define COPY16(dst, src) \ + do { COPY8(dst, src); COPY8((dst) + 8, (src) + 8); } while (0) + #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN) #error "conflicting endian definitions" #elif defined(CONFIG_X86_64) -- 2.50.1