select RTC_LIB
        select SYS_SUPPORTS_APM_EMULATION
        select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
+       select HAVE_ARCH_VMAP_STACK if MMU && THREAD_INFO_IN_TASK && (!LD_IS_LLD || LLD_VERSION >= 140000)
        select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
        # Above selects are sorted alphabetically; please add new ones
        # according to that.  Thanks.
 
 #include <asm/pgtable-2level-types.h>
 #endif
 
+#ifdef CONFIG_VMAP_STACK
+#define ARCH_PAGE_TABLE_SYNC_MASK      PGTBL_PMD_MODIFIED
+#endif
+
 #endif /* CONFIG_MMU */
 
 typedef struct page *pgtable_t;
 
 #define THREAD_SIZE            (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_START_SP                (THREAD_SIZE - 8)
 
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN           (2 * THREAD_SIZE)
+#else
+#define THREAD_ALIGN           THREAD_SIZE
+#endif
+
+#define OVERFLOW_STACK_SIZE    SZ_4K
+
 #ifndef __ASSEMBLY__
 
 struct task_struct;
 
        @
        subs    r2, sp, r0              @ SP above bottom of IRQ stack?
        rsbscs  r2, r2, #THREAD_SIZE    @ ... and below the top?
+#ifdef CONFIG_VMAP_STACK
+       ldr_l   r2, high_memory, cc     @ End of the linear region
+       cmpcc   r2, r0                  @ Stack pointer was below it?
+#endif
        movcs   sp, r0                  @ If so, revert to incoming SP
 
 #ifndef CONFIG_UNWINDER_ARM
 #define SPFIX(code...)
 #endif
 
-       .macro  svc_entry, stack_hole=0, trace=1, uaccess=1
+       .macro  svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1
  UNWIND(.fnstart               )
- UNWIND(.save {r0 - pc}                )
        sub     sp, sp, #(SVC_REGS_SIZE + \stack_hole)
+ THUMB(        add     sp, r1          )       @ get SP in a GPR without
+ THUMB(        sub     r1, sp, r1      )       @ using a temp register
+
+       .if     \overflow_check
+ UNWIND(.save  {r0 - pc}       )
+       do_overflow_check (SVC_REGS_SIZE + \stack_hole)
+       .endif
+
 #ifdef CONFIG_THUMB2_KERNEL
-       add     sp, r1                  @ get SP in a GPR without
-       sub     r1, sp, r1              @ using a temp register
        tst     r1, #4                  @ test stack pointer alignment
        sub     r1, sp, r1              @ restore original R1
        sub     sp, r1                  @ restore original SP
        str     r7, [r8]
 #endif
        mov     r0, r5
-#if !defined(CONFIG_THUMB2_KERNEL)
+#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK)
        set_current r7
        ldmia   r4, {r4 - sl, fp, sp, pc}       @ Load all regs saved previously
 #else
        mov     r1, r7
        ldmia   r4, {r4 - sl, fp, ip, lr}       @ Load all regs saved previously
+#ifdef CONFIG_VMAP_STACK
+       @
+       @ Do a dummy read from the new stack while running from the old one so
+       @ that we can rely on do_translation_fault() to fix up any stale PMD
+       @ entries covering the vmalloc region.
+       @
+       ldr     r2, [ip]
+#endif
 
        @ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what
        @ effectuates the task switch, as that is what causes the observable
  UNWIND(.fnend         )
 ENDPROC(__switch_to)
 
+#ifdef CONFIG_VMAP_STACK
+       .text
+       .align  2
+__bad_stack:
+       @
+       @ We've just detected an overflow. We need to load the address of this
+       @ CPU's overflow stack into the stack pointer register. We have only one
+       @ scratch register so let's use a sequence of ADDs including one
+       @ involving the PC, and decorate them with PC-relative group
+       @ relocations. As these are ARM only, switch to ARM mode first.
+       @
+       @ We enter here with IP clobbered and its value stashed on the mode
+       @ stack.
+       @
+THUMB( bx      pc              )
+THUMB( nop                     )
+THUMB( .arm                    )
+       mrc     p15, 0, ip, c13, c0, 4          @ Get per-CPU offset
+
+       .globl  overflow_stack_ptr
+       .reloc  0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
+       .reloc  1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
+       .reloc  2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
+       add     ip, ip, pc
+0:     add     ip, ip, #-4
+1:     add     ip, ip, #0
+2:     ldr     ip, [ip, #4]
+
+       str     sp, [ip, #-4]!                  @ Preserve original SP value
+       mov     sp, ip                          @ Switch to overflow stack
+       pop     {ip}                            @ Original SP in IP
+
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+       mov     ip, ip                          @ mov expected by unwinder
+       push    {fp, ip, lr, pc}                @ GCC flavor frame record
+#else
+       str     ip, [sp, #-8]!                  @ store original SP
+       push    {fpreg, lr}                     @ Clang flavor frame record
+#endif
+UNWIND( ldr    ip, [r0, #4]    )               @ load exception LR
+UNWIND( str    ip, [sp, #12]   )               @ store in the frame record
+       ldr     ip, [r0, #12]                   @ reload IP
+
+       @ Store the original GPRs to the new stack.
+       svc_entry uaccess=0, overflow_check=0
+
+UNWIND( .save   {sp, pc}       )
+UNWIND( .save   {fpreg, lr}    )
+UNWIND( .setfp  fpreg, sp      )
+
+       ldr     fpreg, [sp, #S_SP]              @ Add our frame record
+                                               @ to the linked list
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+       ldr     r1, [fp, #4]                    @ reload SP at entry
+       add     fp, fp, #12
+#else
+       ldr     r1, [fpreg, #8]
+#endif
+       str     r1, [sp, #S_SP]                 @ store in pt_regs
+
+       @ Stash the regs for handle_bad_stack
+       mov     r0, sp
+
+       @ Time to die
+       bl      handle_bad_stack
+       nop
+UNWIND( .fnend                 )
+ENDPROC(__bad_stack)
+#endif
+
        __INIT
 
 /*
 
 tbl    .req    r8              @ syscall table pointer
 why    .req    r8              @ Linux syscall (!= 0)
 tsk    .req    r9              @ current thread_info
+
+       .macro  do_overflow_check, frame_size:req
+#ifdef CONFIG_VMAP_STACK
+       @
+       @ Test whether the SP has overflowed. Task and IRQ stacks are aligned
+       @ so that SP & BIT(THREAD_SIZE_ORDER + PAGE_SHIFT) should always be
+       @ zero.
+       @
+ARM(   tst     sp, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT)      )
+THUMB( tst     r1, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT)      )
+THUMB( it      ne                                              )
+       bne     .Lstack_overflow_check\@
+
+       .pushsection    .text
+.Lstack_overflow_check\@:
+       @
+       @ The stack pointer is not pointing to a valid vmap'ed stack, but it
+       @ may be pointing into the linear map instead, which may happen if we
+       @ are already running from the overflow stack. We cannot detect overflow
+       @ in such cases so just carry on.
+       @
+       str     ip, [r0, #12]                   @ Stash IP on the mode stack
+       ldr_l   ip, high_memory                 @ Start of VMALLOC space
+ARM(   cmp     sp, ip                  )       @ SP in vmalloc space?
+THUMB( cmp     r1, ip                  )
+THUMB( itt     lo                      )
+       ldrlo   ip, [r0, #12]                   @ Restore IP
+       blo     .Lout\@                         @ Carry on
+
+THUMB( sub     r1, sp, r1              )       @ Restore original R1
+THUMB( sub     sp, r1                  )       @ Restore original SP
+       add     sp, sp, #\frame_size            @ Undo svc_entry's SP change
+       b       __bad_stack                     @ Handle VMAP stack overflow
+       .popsection
+.Lout\@:
+#endif
+       .endm
 
        int cpu;
 
        for_each_possible_cpu(cpu) {
-               stack = (u8 *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+               if (!IS_ENABLED(CONFIG_VMAP_STACK))
+                       stack = (u8 *)__get_free_pages(GFP_KERNEL,
+                                                      THREAD_SIZE_ORDER);
+               else
+                       stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
+                                              THREADINFO_GFP, NUMA_NO_NODE,
+                                              __builtin_return_address(0));
+
                if (WARN_ON(!stack))
                        break;
                per_cpu(irq_stack_ptr, cpu) = &stack[THREAD_SIZE];
 
 int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN;
 
 struct stack {
-       u32 irq[3];
-       u32 abt[3];
-       u32 und[3];
-       u32 fiq[3];
+       u32 irq[4];
+       u32 abt[4];
+       u32 und[4];
+       u32 fiq[4];
 } ____cacheline_aligned;
 
 #ifndef CONFIG_CPU_V7M
 
        ldr     r4, =cpu_suspend_size
 #endif
        mov     r5, sp                  @ current virtual SP
+#ifdef CONFIG_VMAP_STACK
+       @ Run the suspend code from the overflow stack so we don't have to rely
+       @ on vmalloc-to-phys conversions anywhere in the arch suspend code.
+       @ The original SP value captured in R5 will be restored on the way out.
+       mov_l   r6, overflow_stack_ptr  @ Base pointer
+       mrc     p15, 0, r7, c13, c0, 4  @ Get per-CPU offset
+       ldr     sp, [r6, r7]            @ Address of this CPU's overflow stack
+#endif
        add     r4, r4, #12             @ Space for pgd, virt sp, phys resume fn
        sub     sp, sp, r4              @ allocate CPU state on stack
        ldr     r3, =sleep_save_sp
 
 static int verify_stack(unsigned long sp)
 {
        if (sp < PAGE_OFFSET ||
-           (sp > (unsigned long)high_memory && high_memory != NULL))
+           (!IS_ENABLED(CONFIG_VMAP_STACK) &&
+            sp > (unsigned long)high_memory && high_memory != NULL))
                return -EFAULT;
 
        return 0;
 
        if (!user_mode(regs) || in_interrupt()) {
                dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
-                        ALIGN(regs->ARM_sp, THREAD_SIZE));
+                        ALIGN(regs->ARM_sp - THREAD_SIZE, THREAD_ALIGN)
+                        + THREAD_SIZE);
                dump_backtrace(regs, tsk, KERN_EMERG);
                dump_instr(KERN_EMERG, regs);
        }
         */
 #endif
 }
+
+#ifdef CONFIG_VMAP_STACK
+
+DECLARE_PER_CPU(u8 *, irq_stack_ptr);
+
+asmlinkage DEFINE_PER_CPU(u8 *, overflow_stack_ptr);
+
+static int __init allocate_overflow_stacks(void)
+{
+       u8 *stack;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               stack = (u8 *)__get_free_page(GFP_KERNEL);
+               if (WARN_ON(!stack))
+                       return -ENOMEM;
+               per_cpu(overflow_stack_ptr, cpu) = &stack[OVERFLOW_STACK_SIZE];
+       }
+       return 0;
+}
+early_initcall(allocate_overflow_stacks);
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
+{
+       unsigned long tsk_stk = (unsigned long)current->stack;
+       unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+       unsigned long ovf_stk = (unsigned long)this_cpu_read(overflow_stack_ptr);
+
+       console_verbose();
+       pr_emerg("Insufficient stack space to handle exception!");
+
+       pr_emerg("Task stack:     [0x%08lx..0x%08lx]\n",
+                tsk_stk, tsk_stk + THREAD_SIZE);
+       pr_emerg("IRQ stack:      [0x%08lx..0x%08lx]\n",
+                irq_stk - THREAD_SIZE, irq_stk);
+       pr_emerg("Overflow stack: [0x%08lx..0x%08lx]\n",
+                ovf_stk - OVERFLOW_STACK_SIZE, ovf_stk);
+
+       die("kernel stack overflow", regs, 0);
+}
+
+/*
+ * Normally, we rely on the logic in do_translation_fault() to update stale PMD
+ * entries covering the vmalloc space in a task's page tables when it first
+ * accesses the region in question. Unfortunately, this is not sufficient when
+ * the task stack resides in the vmalloc region, as do_translation_fault() is a
+ * C function that needs a stack to run.
+ *
+ * So we need to ensure that these PMD entries are up to date *before* the MM
+ * switch. As we already have some logic in the MM switch path that takes care
+ * of this, let's trigger it by bumping the counter every time the core vmalloc
+ * code modifies a PMD entry in the vmalloc region.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+       if (start > VMALLOC_END || end < VMALLOC_START)
+               return;
+
+       /*
+        * This hooks into the core vmalloc code to receive notifications of
+        * any PMD level changes that have been made to the kernel page tables.
+        * This means it should only be triggered once for every MiB worth of
+        * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
+        * and that kernel PMD level table entries are rarely (if ever)
+        * updated.
+        *
+        * This means that the counter is going to max out at ~250 for the
+        * typical case. If it overflows, something entirely unexpected has
+        * occurred so let's throw a warning if that happens.
+        */
+       WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+}
+
+#endif
 
 
        /* store the highest address on the stack to avoid crossing it*/
        ctrl.sp_low = frame->sp;
-       ctrl.sp_high = ALIGN(ctrl.sp_low, THREAD_SIZE);
+       ctrl.sp_high = ALIGN(ctrl.sp_low - THREAD_SIZE, THREAD_ALIGN)
+                      + THREAD_SIZE;
 
        pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
                 frame->pc, frame->lr, frame->sp);
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
        . = ALIGN(1<<SECTION_SHIFT);
 #else
-       . = ALIGN(THREAD_SIZE);
+       . = ALIGN(THREAD_ALIGN);
 #endif
        __init_end = .;
 
        _sdata = .;
-       RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+       RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
        _edata = .;
 
        BSS_SECTION(0, 0, 0)