]> www.infradead.org Git - users/willy/linux.git/commitdiff
lib/lzo: clean-up by introducing COPY16
authorMatt Sealey <matt.sealey@arm.com>
Wed, 5 Dec 2018 00:14:23 +0000 (11:14 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Mon, 10 Dec 2018 08:29:17 +0000 (19:29 +1100)
Most compilers should be able to merge adjacent loads/stores of sizes
which are less than but effect a multiple of a machine word size (in
effect a memcpy() of a constant amount). However the semantics of the
macro are that it just does the copy, the pointer increment is in the
code, hence we see

    *a = *b
    a += 8
    b += 8
    *a = *b
    a += 8
    b += 8

This introduces a dependency between the two groups of statements which
seems to defeat said compiler optimizers and generate some very strange
sequences of addition and subtraction of address offsets (i.e. it is
overcomplicated).

Since COPY8 is only ever used to copy amounts of 16 bytes (in pairs),
just define COPY16 as COPY8,COPY8. We leave the definition to preserve
the need to do unaligned accesses to machine-sized words per the
original code intent, we just don't use it in the code proper.

COPY16 then gives us code like:

    *a = *b
    *(a+8) = *(b+8)
    a += 16
    b += 16

This seems to allow compilers to generate much better code by using
base register writeback or simply positively incrementing offsets which
seems to positively affect performance. It is, at least, fewer
instructions to do the same job.

Link: http://lkml.kernel.org/r/20181127161913.23863-3-dave.rodgman@arm.com
Signed-off-by: Matt Sealey <matt.sealey@arm.com>
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <nitingupta910@gmail.com>
Cc: Richard Purdie <rpurdie@openedhand.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Sonny Rao <sonnyrao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
lib/lzo/lzo1x_compress.c
lib/lzo/lzo1x_decompress_safe.c
lib/lzo/lzodefs.h

index 236eb21167b5db1f5cd542efbf6c66dd770455fc..82fb5571ce5ec1762646b431c826be5d533b60f5 100644 (file)
@@ -60,8 +60,7 @@ next:
                                op += t;
                        } else if (t <= 16) {
                                *op++ = (t - 3);
-                               COPY8(op, ii);
-                               COPY8(op + 8, ii + 8);
+                               COPY16(op, ii);
                                op += t;
                        } else {
                                if (t <= 18) {
@@ -76,8 +75,7 @@ next:
                                        *op++ = tt;
                                }
                                do {
-                                       COPY8(op, ii);
-                                       COPY8(op + 8, ii + 8);
+                                       COPY16(op, ii);
                                        op += 16;
                                        ii += 16;
                                        t -= 16;
@@ -255,8 +253,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
                        *op++ = tt;
                }
                if (t >= 16) do {
-                       COPY8(op, ii);
-                       COPY8(op + 8, ii + 8);
+                       COPY16(op, ii);
                        op += 16;
                        ii += 16;
                        t -= 16;
index a1c387f6afba24c3a027456b48b44c603c3c3b96..aa95d3066b7d174d66387e60d7c173236ac79a0b 100644 (file)
@@ -86,12 +86,9 @@ copy_literal_run:
                                        const unsigned char *ie = ip + t;
                                        unsigned char *oe = op + t;
                                        do {
-                                               COPY8(op, ip);
-                                               op += 8;
-                                               ip += 8;
-                                               COPY8(op, ip);
-                                               op += 8;
-                                               ip += 8;
+                                               COPY16(op, ip);
+                                               op += 16;
+                                               ip += 16;
                                        } while (ip < ie);
                                        ip = ie;
                                        op = oe;
@@ -187,12 +184,9 @@ copy_literal_run:
                        unsigned char *oe = op + t;
                        if (likely(HAVE_OP(t + 15))) {
                                do {
-                                       COPY8(op, m_pos);
-                                       op += 8;
-                                       m_pos += 8;
-                                       COPY8(op, m_pos);
-                                       op += 8;
-                                       m_pos += 8;
+                                       COPY16(op, m_pos);
+                                       op += 16;
+                                       m_pos += 16;
                                } while (op < oe);
                                op = oe;
                                if (HAVE_IP(6)) {
index 497f9c9f03a80ced87a629ca762a515d0dd911d3..e1b3cf6459a9bd91809bdb6dd73e066bd2a5a351 100644 (file)
@@ -23,6 +23,9 @@
                COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
 #endif
 
+#define COPY16(dst, src) \
+       do { COPY8(dst, src); COPY8((dst) + 8, (src) + 8); } while (0)
+
 #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
 #error "conflicting endian definitions"
 #elif defined(CONFIG_X86_64)