ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE

author Ard Biesheuvel <ardb@kernel.org>

Sun, 20 Sep 2020 21:02:25 +0000 (23:02 +0200)

committer Ard Biesheuvel <ardb@kernel.org>

Wed, 28 Oct 2020 15:59:43 +0000 (16:59 +0100)
author Ard Biesheuvel <ardb@kernel.org>
Sun, 20 Sep 2020 21:02:25 +0000 (23:02 +0200)
committer Ard Biesheuvel <ardb@kernel.org>
Wed, 28 Oct 2020 15:59:43 +0000 (16:59 +0100)
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h

index 4121662dea5a4465f9399e20e34723315fa1109d..ccf55cef6ab957e7c4bb4678c9a3cb849eba432f 100644 (file)
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
  #define PHYS_OFFSET    ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
  #define PHYS_PFN_OFFSET        (__pv_phys_pfn_offset)
  
+#ifndef CONFIG_THUMB2_KERNEL
  #define __pv_stub(from,to,instr)                       \
         __asm__("@ __pv_stub\n"                         \
         "1:     " instr "       %0, %1, %2\n"           \
@@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end;
         : "=r" (to)                                     \
         : "r" (from), "I" (__PV_BITS_31_24))
  
-#define __pv_stub_mov_hi(t)                            \
-       __asm__ volatile("@ __pv_stub_mov\n"            \
-       "1:     mov     %R0, %1\n"                      \
+#define __pv_add_carry_stub(x, y)                      \
+       __asm__("@ __pv_add_carry_stub\n"               \
+       "0:     movw    %R0, #0\n"                      \
+       "       adds    %Q0, %1, %R0, lsl #24\n"        \
+       "1:     mov     %R0, %2\n"                      \
+       "       adc     %R0, %R0, #0\n"                 \
         "       .pushsection .pv_table,\"a\"\n"         \
-       "       .long   1b - .\n"                       \
+       "       .long   0b - ., 1b - .\n"               \
         "       .popsection\n"                          \
-       : "=r" (t)                                      \
-       : "I" (__PV_BITS_7_0))
+       : "=&r" (y)                                     \
+       : "r" (x), "I" (__PV_BITS_7_0)                  \
+       : "cc")
+
+#else
+#define __pv_stub(from,to,instr)                       \
+       __asm__("@ __pv_stub\n"                         \
+       "0:     movw    %0, #0\n"                       \
+       "       lsl     %0, #24\n"                      \
+       "       " instr " %0, %1, %0\n"                 \
+       "       .pushsection .pv_table,\"a\"\n"         \
+       "       .long   0b - .\n"                       \
+       "       .popsection\n"                          \
+       : "=&r" (to)                                    \
+       : "r" (from))
  
  #define __pv_add_carry_stub(x, y)                      \
-       __asm__ volatile("@ __pv_add_carry_stub\n"      \
-       "1:     adds    %Q0, %1, %2\n"                  \
+       __asm__("@ __pv_add_carry_stub\n"               \
+       "0:     movw    %R0, #0\n"                      \
+       "       lsls    %R0, #24\n"                     \
+       "       adds    %Q0, %1, %R0\n"                 \
+       "1:     mvn     %R0, #0\n"                      \
         "       adc     %R0, %R0, #0\n"                 \
         "       .pushsection .pv_table,\"a\"\n"         \
-       "       .long   1b - .\n"                       \
+       "       .long   0b - ., 1b - .\n"               \
         "       .popsection\n"                          \
-       : "+r" (y)                                      \
-       : "r" (x), "I" (__PV_BITS_31_24)                \
+       : "=&r" (y)                                     \
+       : "r" (x)                                       \
         : "cc")
+#endif
  
  static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
  {
@@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
         if (sizeof(phys_addr_t) == 4) {
                 __pv_stub(x, t, "add");
         } else {
-               __pv_stub_mov_hi(t);
                 __pv_add_carry_stub(x, t);
         }
         return t;
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S

index be8fb0d8987767dc6021026501cdbb5bab581b98..a4e364689663d895febcf4e1079645606d7f5100 100644 (file)
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -1,7 +1,7 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
   *  Copyright (C) 1994-2002 Russell King
- *  Copyright (c) 2003 ARM Limited
+ *  Copyright (c) 2003, 2020 ARM Limited
   *  All Rights Reserved
   */
  
@@ -58,55 +58,140 @@ __fixup_a_pv_table:
         mov     r6, r6, lsr #24
         cmn     r0, #1
  #ifdef CONFIG_THUMB2_KERNEL
+       @
+       @ The Thumb-2 versions of the patchable sequences are
+       @
+       @ phys-to-virt:                 movw    <reg>, #offset<31:24>
+       @                               lsl     <reg>, #24
+       @                               sub     <VA>, <PA>, <reg>
+       @
+       @ virt-to-phys (non-LPAE):      movw    <reg>, #offset<31:24>
+       @                               lsl     <reg>, #24
+       @                               add     <PA>, <VA>, <reg>
+       @
+       @ virt-to-phys (LPAE):          movw    <reg>, #offset<31:24>
+       @                               lsl     <reg>, #24
+       @                               adds    <PAlo>, <VA>, <reg>
+       @                               mov     <PAhi>, #offset<39:32>
+       @                               adc     <PAhi>, <PAhi>, #0
+       @
+       @ In the non-LPAE case, all patchable instructions are MOVW
+       @ instructions, where we need to patch in the offset into the
+       @ second halfword of the opcode (the 16-bit immediate is encoded
+       @ as imm4:i:imm3:imm8)
+       @
+       @       15       11 10  9           4 3    0  15  14  12 11 8 7    0
+       @      +-----------+---+-------------+------++---+------+----+------+
+       @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
+       @      +-----------+---+-------------+------++---+------+----+------+
+       @
+       @ In the LPAE case, we also need to patch in the high word of the
+       @ offset into the immediate field of the MOV instruction, or patch it
+       @ to a MVN instruction if the offset is negative. In this case, we
+       @ need to inspect the first halfword of the opcode, to check whether
+       @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
+       @ needed. The encoding of the immediate is rather complex for values
+       @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
+       @ order bits, which can be patched into imm8 directly (and i:imm3
+       @ cleared)
+       @
+       @      15       11 10  9        5         0  15  14  12 11 8 7    0
+       @     +-----------+---+---------------------++---+------+----+------+
+       @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+       @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+       @     +-----------+---+---------------------++---+------+----+------+
+       @
         moveq   r0, #0x200000           @ set bit 21, mov to mvn instruction
-       lsls    r6, #24
-       beq     .Lnext
-       clz     r7, r6
-       lsr     r6, #24
-       lsl     r6, r7
-       bic     r6, #0x0080
-       lsrs    r7, #1
-       orrcs   r6, #0x0080
-       orr     r6, r6, r7, lsl #12
-       orr     r6, #0x4000
         b       .Lnext
  .Lloop:        add     r7, r4
-       adds    r4, #4
-       ldrh    ip, [r7, #2]
-ARM_BE8(rev16  ip, ip)
-       tst     ip, #0x4000
-       and     ip, #0x8f00
-       orrne   ip, r6                  @ mask in offset bits 31-24
-       orreq   ip, r0                  @ mask in offset bits 7-0
-ARM_BE8(rev16  ip, ip)
-       strh    ip, [r7, #2]
-       bne     .Lnext
+       adds    r4, #4                  @ clears Z flag
+#ifdef CONFIG_ARM_LPAE
         ldrh    ip, [r7]
  ARM_BE8(rev16  ip, ip)
-       bic     ip, #0x20
-       orr     ip, ip, r0, lsr #16
+       tst     ip, #0x200              @ MOVW has bit 9 set, MVN has it clear
+       bne     0f                      @ skip to MOVW handling (Z flag is clear)
+       bic     ip, #0x20               @ clear bit 5 (MVN -> MOV)
+       orr     ip, ip, r0, lsr #16     @ MOV -> MVN if offset < 0
  ARM_BE8(rev16  ip, ip)
         strh    ip, [r7]
+       @ Z flag is set
+0:
+#endif
+       ldrh    ip, [r7, #2]
+ARM_BE8(rev16  ip, ip)
+       and     ip, #0xf00              @ clear everything except Rd field
+       orreq   ip, r0                  @ Z flag set -> MOV/MVN -> patch in high bits
+       orrne   ip, r6                  @ Z flag clear -> MOVW -> patch in low bits
+ARM_BE8(rev16  ip, ip)
+       strh    ip, [r7, #2]
  #else
  #ifdef CONFIG_CPU_ENDIAN_BE8
  @ in BE8, we load data in BE, but instructions still in LE
-#define PV_BIT22       0x00004000
+#define PV_BIT24       0x00000001
  #define PV_IMM8_MASK   0xff000000
-#define PV_ROT_MASK    0x000f0000
  #else
-#define PV_BIT22       0x00400000
+#define PV_BIT24       0x01000000
  #define PV_IMM8_MASK   0x000000ff
-#define PV_ROT_MASK    0xf00
  #endif
  
+       @
+       @ The ARM versions of the patchable sequences are
+       @
+       @ phys-to-virt:                 sub     <VA>, <PA>, #offset<31:24>, lsl #24
+       @
+       @ virt-to-phys (non-LPAE):      add     <PA>, <VA>, #offset<31:24>, lsl #24
+       @
+       @ virt-to-phys (LPAE):          movw    <reg>, #offset<31:24>
+       @                               adds    <PAlo>, <VA>, <reg>, lsl #24
+       @                               mov     <PAhi>, #offset<39:32>
+       @                               adc     <PAhi>, <PAhi>, #0
+       @
+       @ In the non-LPAE case, all patchable instructions are ADD or SUB
+       @ instructions, where we need to patch in the offset into the
+       @ immediate field of the opcode, which is emitted with the correct
+       @ rotation value. (The effective value of the immediate is imm12<7:0>
+       @ rotated right by [2 * imm12<11:8>] bits)
+       @
+       @      31   28 27      23 22  20 19  16 15  12 11    0
+       @      +------+-----------------+------+------+-------+
+       @  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
+       @  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
+       @  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
+       @  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
+       @      +------+-----------------+------+------+-------+
+       @
+       @ In the LPAE case, we use a MOVW instruction to carry the low offset
+       @ word, and patch in the high word of the offset into the immediate
+       @ field of the subsequent MOV instruction, or patch it to a MVN
+       @ instruction if the offset is negative. We can distinguish MOVW
+       @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
+       @ distinguished from MOV/MVN (all using the encodings above) using
+       @ bit 24.
+       @
+       @      31   28 27      23 22  20 19  16 15  12 11    0
+       @      +------+-----------------+------+------+-------+
+       @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
+       @      +------+-----------------+------+------+-------+
+       @
         moveq   r0, #0x400000           @ set bit 22, mov to mvn instruction
         b       .Lnext
  .Lloop:        ldr     ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+       tst     ip, #PV_BIT24           @ ADD/SUB have bit 24 clear
+       beq     1f
+ARM_BE8(rev    ip, ip)
+       tst     ip, #0xc00000           @ MOVW has bits 23:22 clear
+       bic     ip, ip, #0x400000       @ clear bit 22
+       bfc     ip, #0, #12             @ clear imm12 field of MOV[W] instruction
+       orreq   ip, ip, r6              @ MOVW -> mask in offset bits 31-24
+       orrne   ip, ip, r0              @ MOV  -> mask in offset bits 7-0 (or bit 22)
+ARM_BE8(rev    ip, ip)
+       b       2f
+1:
+#endif
         bic     ip, ip, #PV_IMM8_MASK
-       tst     ip, #PV_ROT_MASK                @ check the rotation field
-       orrne   ip, ip, r6 ARM_BE8(, lsl #24)   @ mask in offset bits 31-24
-       biceq   ip, ip, #PV_BIT22               @ clear bit 22
-       orreq   ip, ip, r0 ARM_BE8(, ror #8)    @ mask in offset bits 7-0 (or bit 22)
+       orr     ip, ip, r6 ARM_BE8(, lsl #24)   @ mask in offset bits 31-24
+2:
         str     ip, [r7, r4]
         add     r4, r4, #4
  #endif
author	Ard Biesheuvel <ardb@kernel.org>
	Sun, 20 Sep 2020 21:02:25 +0000 (23:02 +0200)
committer	Ard Biesheuvel <ardb@kernel.org>
	Wed, 28 Oct 2020 15:59:43 +0000 (16:59 +0100)
arch/arm/include/asm/memory.h		patch \| blob \| history
arch/arm/kernel/phys2virt.S		patch \| blob \| history