/* Copy the metadata following the memmove() behavior. */
 void kmsan_internal_memmove_metadata(void *dst, void *src, size_t n)
 {
+       depot_stack_handle_t prev_old_origin = 0, prev_new_origin = 0;
+       int i, iter, step, src_off, dst_off, oiter_src, oiter_dst;
        depot_stack_handle_t old_origin = 0, new_origin = 0;
-       int src_slots, dst_slots, i, iter, step, skip_bits;
        depot_stack_handle_t *origin_src, *origin_dst;
-       void *shadow_src, *shadow_dst;
-       u32 *align_shadow_src, shadow;
+       u8 *shadow_src, *shadow_dst;
+       u32 *align_shadow_dst;
        bool backwards;
 
        shadow_dst = kmsan_get_metadata(dst, KMSAN_META_SHADOW);
        if (!shadow_dst)
                return;
        KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(dst, n));
+       align_shadow_dst =
+               (u32 *)ALIGN_DOWN((u64)shadow_dst, KMSAN_ORIGIN_SIZE);
 
        shadow_src = kmsan_get_metadata(src, KMSAN_META_SHADOW);
        if (!shadow_src) {
-               /*
-                * @src is untracked: zero out destination shadow, ignore the
-                * origins, we're done.
-                */
-               __memset(shadow_dst, 0, n);
+               /* @src is untracked: mark @dst as initialized. */
+               kmsan_internal_unpoison_memory(dst, n, /*checked*/ false);
                return;
        }
        KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(src, n));
 
-       __memmove(shadow_dst, shadow_src, n);
-
        origin_dst = kmsan_get_metadata(dst, KMSAN_META_ORIGIN);
        origin_src = kmsan_get_metadata(src, KMSAN_META_ORIGIN);
        KMSAN_WARN_ON(!origin_dst || !origin_src);
-       src_slots = (ALIGN((u64)src + n, KMSAN_ORIGIN_SIZE) -
-                    ALIGN_DOWN((u64)src, KMSAN_ORIGIN_SIZE)) /
-                   KMSAN_ORIGIN_SIZE;
-       dst_slots = (ALIGN((u64)dst + n, KMSAN_ORIGIN_SIZE) -
-                    ALIGN_DOWN((u64)dst, KMSAN_ORIGIN_SIZE)) /
-                   KMSAN_ORIGIN_SIZE;
-       KMSAN_WARN_ON((src_slots < 1) || (dst_slots < 1));
-       KMSAN_WARN_ON((src_slots - dst_slots > 1) ||
-                     (dst_slots - src_slots < -1));
 
        backwards = dst > src;
-       i = backwards ? min(src_slots, dst_slots) - 1 : 0;
-       iter = backwards ? -1 : 1;
-
-       align_shadow_src =
-               (u32 *)ALIGN_DOWN((u64)shadow_src, KMSAN_ORIGIN_SIZE);
-       for (step = 0; step < min(src_slots, dst_slots); step++, i += iter) {
-               KMSAN_WARN_ON(i < 0);
-               shadow = align_shadow_src[i];
-               if (i == 0) {
-                       /*
-                        * If @src isn't aligned on KMSAN_ORIGIN_SIZE, don't
-                        * look at the first @src % KMSAN_ORIGIN_SIZE bytes
-                        * of the first shadow slot.
-                        */
-                       skip_bits = ((u64)src % KMSAN_ORIGIN_SIZE) * 8;
-                       shadow = (shadow >> skip_bits) << skip_bits;
+       step = backwards ? -1 : 1;
+       iter = backwards ? n - 1 : 0;
+       src_off = (u64)src % KMSAN_ORIGIN_SIZE;
+       dst_off = (u64)dst % KMSAN_ORIGIN_SIZE;
+
+       /* Copy shadow bytes one by one, updating the origins if necessary. */
+       for (i = 0; i < n; i++, iter += step) {
+               oiter_src = (iter + src_off) / KMSAN_ORIGIN_SIZE;
+               oiter_dst = (iter + dst_off) / KMSAN_ORIGIN_SIZE;
+               if (!shadow_src[iter]) {
+                       shadow_dst[iter] = 0;
+                       if (!align_shadow_dst[oiter_dst])
+                               origin_dst[oiter_dst] = 0;
+                       continue;
                }
-               if (i == src_slots - 1) {
-                       /*
-                        * If @src + n isn't aligned on
-                        * KMSAN_ORIGIN_SIZE, don't look at the last
-                        * (@src + n) % KMSAN_ORIGIN_SIZE bytes of the
-                        * last shadow slot.
-                        */
-                       skip_bits = (((u64)src + n) % KMSAN_ORIGIN_SIZE) * 8;
-                       shadow = (shadow << skip_bits) >> skip_bits;
-               }
-               /*
-                * Overwrite the origin only if the corresponding
-                * shadow is nonempty.
-                */
-               if (origin_src[i] && (origin_src[i] != old_origin) && shadow) {
-                       old_origin = origin_src[i];
-                       new_origin = kmsan_internal_chain_origin(old_origin);
+               shadow_dst[iter] = shadow_src[iter];
+               old_origin = origin_src[oiter_src];
+               if (old_origin == prev_old_origin)
+                       new_origin = prev_new_origin;
+               else {
                        /*
                         * kmsan_internal_chain_origin() may return
                         * NULL, but we don't want to lose the previous
                         * origin value.
                         */
+                       new_origin = kmsan_internal_chain_origin(old_origin);
                        if (!new_origin)
                                new_origin = old_origin;
                }
-               if (shadow)
-                       origin_dst[i] = new_origin;
-               else
-                       origin_dst[i] = 0;
-       }
-       /*
-        * If dst_slots is greater than src_slots (i.e.
-        * dst_slots == src_slots + 1), there is an extra origin slot at the
-        * beginning or end of the destination buffer, for which we take the
-        * origin from the previous slot.
-        * This is only done if the part of the source shadow corresponding to
-        * slot is non-zero.
-        *
-        * E.g. if we copy 8 aligned bytes that are marked as uninitialized
-        * and have origins o111 and o222, to an unaligned buffer with offset 1,
-        * these two origins are copied to three origin slots, so one of then
-        * needs to be duplicated, depending on the copy direction (@backwards)
-        *
-        *   src shadow: |uuuu|uuuu|....|
-        *   src origin: |o111|o222|....|
-        *
-        * backwards = 0:
-        *   dst shadow: |.uuu|uuuu|u...|
-        *   dst origin: |....|o111|o222| - fill the empty slot with o111
-        * backwards = 1:
-        *   dst shadow: |.uuu|uuuu|u...|
-        *   dst origin: |o111|o222|....| - fill the empty slot with o222
-        */
-       if (src_slots < dst_slots) {
-               if (backwards) {
-                       shadow = align_shadow_src[src_slots - 1];
-                       skip_bits = (((u64)dst + n) % KMSAN_ORIGIN_SIZE) * 8;
-                       shadow = (shadow << skip_bits) >> skip_bits;
-                       if (shadow)
-                               /* src_slots > 0, therefore dst_slots is at least 2 */
-                               origin_dst[dst_slots - 1] =
-                                       origin_dst[dst_slots - 2];
-               } else {
-                       shadow = align_shadow_src[0];
-                       skip_bits = ((u64)dst % KMSAN_ORIGIN_SIZE) * 8;
-                       shadow = (shadow >> skip_bits) << skip_bits;
-                       if (shadow)
-                               origin_dst[0] = origin_dst[1];
-               }
+               origin_dst[oiter_dst] = new_origin;
+               prev_new_origin = new_origin;
+               prev_old_origin = old_origin;
        }
 }