DECLARE_FLEX_ARRAY(char, buffer);
 };
 
+struct aa_local_cache {
+       unsigned int hold;
+       unsigned int count;
+       struct list_head head;
+};
+
 #define RESERVE_COUNT 2
 static int reserve_count = RESERVE_COUNT;
 static int buffer_count;
 
 static LIST_HEAD(aa_global_buffers);
 static DEFINE_SPINLOCK(aa_buffers_lock);
+static DEFINE_PER_CPU(struct aa_local_cache, aa_local_buffers);
 
 /*
  * LSM hook functions
 char *aa_get_buffer(bool in_atomic)
 {
        union aa_buffer *aa_buf;
+       struct aa_local_cache *cache;
        bool try_again = true;
        gfp_t flags = (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 
+       /* use per cpu cached buffers first */
+       cache = get_cpu_ptr(&aa_local_buffers);
+       if (!list_empty(&cache->head)) {
+               aa_buf = list_first_entry(&cache->head, union aa_buffer, list);
+               list_del(&aa_buf->list);
+               cache->hold--;
+               cache->count--;
+               put_cpu_ptr(&aa_local_buffers);
+               return &aa_buf->buffer[0];
+       }
+       put_cpu_ptr(&aa_local_buffers);
+
+       if (!spin_trylock(&aa_buffers_lock)) {
+               cache = get_cpu_ptr(&aa_local_buffers);
+               cache->hold += 1;
+               put_cpu_ptr(&aa_local_buffers);
+               spin_lock(&aa_buffers_lock);
+       } else {
+               cache = get_cpu_ptr(&aa_local_buffers);
+               put_cpu_ptr(&aa_local_buffers);
+       }
 retry:
-       spin_lock(&aa_buffers_lock);
        if (buffer_count > reserve_count ||
            (in_atomic && !list_empty(&aa_global_buffers))) {
                aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer,
        if (!aa_buf) {
                if (try_again) {
                        try_again = false;
+                       spin_lock(&aa_buffers_lock);
                        goto retry;
                }
                pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n");
 void aa_put_buffer(char *buf)
 {
        union aa_buffer *aa_buf;
+       struct aa_local_cache *cache;
 
        if (!buf)
                return;
        aa_buf = container_of(buf, union aa_buffer, buffer[0]);
 
-       spin_lock(&aa_buffers_lock);
-       list_add(&aa_buf->list, &aa_global_buffers);
-       buffer_count++;
-       spin_unlock(&aa_buffers_lock);
+       cache = get_cpu_ptr(&aa_local_buffers);
+       if (!cache->hold) {
+               put_cpu_ptr(&aa_local_buffers);
+
+               if (spin_trylock(&aa_buffers_lock)) {
+                       /* put back on global list */
+                       list_add(&aa_buf->list, &aa_global_buffers);
+                       buffer_count++;
+                       spin_unlock(&aa_buffers_lock);
+                       cache = get_cpu_ptr(&aa_local_buffers);
+                       put_cpu_ptr(&aa_local_buffers);
+                       return;
+               }
+               /* contention on global list, fallback to percpu */
+               cache = get_cpu_ptr(&aa_local_buffers);
+               cache->hold += 1;
+       }
+
+       /* cache in percpu list */
+       list_add(&aa_buf->list, &cache->head);
+       cache->count++;
+       put_cpu_ptr(&aa_local_buffers);
 }
 
 /*
        union aa_buffer *aa_buf;
        int i, num;
 
+       /*
+        * per cpu set of cached allocated buffers used to help reduce
+        * lock contention
+        */
+       for_each_possible_cpu(i) {
+               per_cpu(aa_local_buffers, i).hold = 0;
+               per_cpu(aa_local_buffers, i).count = 0;
+               INIT_LIST_HEAD(&per_cpu(aa_local_buffers, i).head);
+       }
        /*
         * A function may require two buffers at once. Usually the buffers are
         * used for a short period of time and are shared. On UP kernel buffers