obj-y := \
        entry.o process.o bfin_ksyms.o ptrace.o setup.o signal.o \
-       sys_bfin.o time.o traps.o irqchip.o dma-mapping.o flat.o
+       sys_bfin.o time.o traps.o irqchip.o dma-mapping.o flat.o \
+       fixed_code.o
 
 obj-$(CONFIG_BF53x)                 += bfin_gpio.o
 obj-$(CONFIG_BF561)                 += bfin_gpio.o
 
--- /dev/null
+/*
+ * This file contains sequences of code that will be copied to a
+ * fixed location, defined in <asm/atomic_seq.h>.  The interrupt
+ * handlers ensure that these sequences appear to be atomic when
+ * executed from userspace.
+ * These are aligned to 16 bytes, so that we have some space to replace
+ * these sequences with something else (e.g. kernel traps if we ever do
+ * BF561 SMP).
+ */
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/unistd.h>
+
+.text
+ENTRY(_fixed_code_start)
+
+.align 16
+ENTRY(_sigreturn_stub)
+       P0 = __NR_rt_sigreturn;
+       EXCPT 0;
+       /* Speculative execution paranoia.  */
+0:     JUMP.S 0b;
+ENDPROC (_sigreturn_stub)
+
+.align 16
+       /*
+        * Atomic swap, 8 bit.
+        * Inputs:      P0: memory address to use
+        *              R1: value to store
+        * Output:      R0: old contents of the memory address, zero extended.
+        */
+ENTRY(_atomic_xchg32)
+       R0 = [P0];
+       [P0] = R1;
+       rts;
+ENDPROC (_atomic_xchg32)
+
+.align 16
+       /*
+        * Compare and swap, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R1: compare value
+        *              R2: new value to store
+        * The new value is stored if the contents of the memory
+        * address is equal to the compare value.
+        * Output:      R0: old contents of the memory address.
+        */
+ENTRY(_atomic_cas32)
+       R0 = [P0];
+       CC = R0 == R1;
+       IF !CC JUMP 1f;
+       [P0] = R2;
+1:
+       rts;
+ENDPROC (_atomic_cas32)
+
+.align 16
+       /*
+        * Atomic add, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R0: value to add
+        * Outputs:     R0: new contents of the memory address.
+        *              R1: previous contents of the memory address.
+        */
+ENTRY(_atomic_add32)
+       R1 = [P0];
+       R0 = R1 + R0;
+       [P0] = R0;
+       rts;
+ENDPROC (_atomic_add32)
+
+.align 16
+       /*
+        * Atomic sub, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R0: value to subtract
+        * Outputs:     R0: new contents of the memory address.
+        *              R1: previous contents of the memory address.
+        */
+ENTRY(_atomic_sub32)
+       R1 = [P0];
+       R0 = R1 - R0;
+       [P0] = R0;
+       rts;
+ENDPROC (_atomic_sub32)
+
+.align 16
+       /*
+        * Atomic ior, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R0: value to ior
+        * Outputs:     R0: new contents of the memory address.
+        *              R1: previous contents of the memory address.
+        */
+ENTRY(_atomic_ior32)
+       R1 = [P0];
+       R0 = R1 | R0;
+       [P0] = R0;
+       rts;
+ENDPROC (_atomic_ior32)
+
+.align 16
+       /*
+        * Atomic ior, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R0: value to ior
+        * Outputs:     R0: new contents of the memory address.
+        *              R1: previous contents of the memory address.
+        */
+ENTRY(_atomic_and32)
+       R1 = [P0];
+       R0 = R1 & R0;
+       [P0] = R0;
+       rts;
+ENDPROC (_atomic_ior32)
+
+.align 16
+       /*
+        * Atomic ior, 32 bit.
+        * Inputs:      P0: memory address to use
+        *              R0: value to ior
+        * Outputs:     R0: new contents of the memory address.
+        *              R1: previous contents of the memory address.
+        */
+ENTRY(_atomic_xor32)
+       R1 = [P0];
+       R0 = R1 ^ R0;
+       [P0] = R0;
+       rts;
+ENDPROC (_atomic_ior32)
+
+ENTRY(_fixed_code_end)
 
 
 #include <asm/blackfin.h>
 #include <asm/uaccess.h>
+#include <asm/fixed_code.h>
 
 #define        LED_ON  0
 #define        LED_OFF 1
        return 0;
 }
 
+void finish_atomic_sections (struct pt_regs *regs)
+{
+       if (regs->pc < ATOMIC_SEQS_START || regs->pc >= ATOMIC_SEQS_END)
+               return;
+
+       switch (regs->pc) {
+       case ATOMIC_XCHG32 + 2:
+               put_user(regs->r1, (int *)regs->p0);
+               regs->pc += 2;
+               break;
+
+       case ATOMIC_CAS32 + 2:
+       case ATOMIC_CAS32 + 4:
+               if (regs->r0 == regs->r1)
+                       put_user(regs->r2, (int *)regs->p0);
+               regs->pc = ATOMIC_CAS32 + 8;
+               break;
+       case ATOMIC_CAS32 + 6:
+               put_user(regs->r2, (int *)regs->p0);
+               regs->pc += 2;
+               break;
+
+       case ATOMIC_ADD32 + 2:
+               regs->r0 = regs->r1 + regs->r0;
+               /* fall through */
+       case ATOMIC_ADD32 + 4:
+               put_user(regs->r0, (int *)regs->p0);
+               regs->pc = ATOMIC_ADD32 + 6;
+               break;
+
+       case ATOMIC_SUB32 + 2:
+               regs->r0 = regs->r1 - regs->r0;
+               /* fall through */
+       case ATOMIC_SUB32 + 4:
+               put_user(regs->r0, (int *)regs->p0);
+               regs->pc = ATOMIC_SUB32 + 6;
+               break;
+
+       case ATOMIC_IOR32 + 2:
+               regs->r0 = regs->r1 | regs->r0;
+               /* fall through */
+       case ATOMIC_IOR32 + 4:
+               put_user(regs->r0, (int *)regs->p0);
+               regs->pc = ATOMIC_IOR32 + 6;
+               break;
+
+       case ATOMIC_AND32 + 2:
+               regs->r0 = regs->r1 & regs->r0;
+               /* fall through */
+       case ATOMIC_AND32 + 4:
+               put_user(regs->r0, (int *)regs->p0);
+               regs->pc = ATOMIC_AND32 + 6;
+               break;
+
+       case ATOMIC_XOR32 + 2:
+               regs->r0 = regs->r1 ^ regs->r0;
+               /* fall through */
+       case ATOMIC_XOR32 + 4:
+               put_user(regs->r0, (int *)regs->p0);
+               regs->pc = ATOMIC_XOR32 + 6;
+               break;
+       }
+}
+
 #if defined(CONFIG_ACCESS_CHECK)
 int _access_ok(unsigned long addr, unsigned long size)
 {
 
 #include <asm/cacheflush.h>
 #include <asm/blackfin.h>
 #include <asm/cplbinit.h>
+#include <asm/fixed_code.h>
 
 u16 _bfin_swrst;
 
 
        printk(KERN_INFO "Hardware Trace Enabled\n");
        bfin_write_TBUFCTL(0x03);
+
+       /* Copy atomic sequences to their fixed location, and sanity check that
+          these locations are the ones that we advertise to userspace.  */
+       memcpy((void *)FIXED_CODE_START, &fixed_code_start,
+              FIXED_CODE_END - FIXED_CODE_START);
+       BUG_ON((char *)&sigreturn_stub - (char *)&fixed_code_start
+              != SIGRETURN_STUB - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_xchg32 - (char *)&fixed_code_start
+              != ATOMIC_XCHG32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_cas32 - (char *)&fixed_code_start
+              != ATOMIC_CAS32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_add32 - (char *)&fixed_code_start
+              != ATOMIC_ADD32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_sub32 - (char *)&fixed_code_start
+              != ATOMIC_SUB32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_ior32 - (char *)&fixed_code_start
+              != ATOMIC_IOR32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_and32 - (char *)&fixed_code_start
+              != ATOMIC_AND32 - FIXED_CODE_START);
+       BUG_ON((char *)&atomic_xor32 - (char *)&fixed_code_start
+              != ATOMIC_XOR32 - FIXED_CODE_START);
 }
 
 static int __init topology_init(void)
 
        r0 = [p0];
        sti r0;
 
+       r0 = sp;
+       sp += -12;
+       call _finish_atomic_sections;
+       sp += 12;
        jump.s .Lresume_userspace;
 
 _schedule_and_signal:
 
 include include/asm-generic/Kbuild.asm
+
+header-y += fixed_code.h
 
 extern asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs);
 extern void bfin_gpio_interrupt_setup(int irq, int irq_pfx, int type);
 
+extern asmlinkage void finish_atomic_sections (struct pt_regs *regs);
+extern char fixed_code_start;
+extern char fixed_code_end;
+extern int atomic_xchg32(void);
+extern int atomic_cas32(void);
+extern int atomic_add32(void);
+extern int atomic_sub32(void);
+extern int atomic_ior32(void);
+extern int atomic_and32(void);
+extern int atomic_xor32(void);
+extern void sigreturn_stub(void);
+
 extern void *l1_data_A_sram_alloc(size_t);
 extern void *l1_data_B_sram_alloc(size_t);
 extern void *l1_inst_sram_alloc(size_t);
 
 static struct cplb_desc cplb_data[] = {
        {
                .start = 0,
-               .end = SIZE_4K,
-               .psize = SIZE_4K,
+               .end = SIZE_1K,
+               .psize = SIZE_1K,
                .attr = INITIAL_T | SWITCH_T | I_CPLB | D_CPLB,
                .i_conf = SDRAM_OOPS,
                .d_conf = SDRAM_OOPS,
 
--- /dev/null
+/* This file defines the fixed addresses where userspace programs can find
+   atomic code sequences.  */
+
+#define FIXED_CODE_START       0x400
+
+#define SIGRETURN_STUB         0x400
+
+#define ATOMIC_SEQS_START      0x410
+
+#define ATOMIC_XCHG32          0x410
+#define ATOMIC_CAS32           0x420
+#define ATOMIC_ADD32           0x430
+#define ATOMIC_SUB32           0x440
+#define ATOMIC_IOR32           0x450
+#define ATOMIC_AND32           0x460
+#define ATOMIC_XOR32           0x470
+
+#define ATOMIC_SEQS_END                0x480
+
+#define FIXED_CODE_END         0x480