#include <sys/mman.h>
 #include <inttypes.h>
 #include <asm/bug.h>
+#ifdef HAVE_LIBNUMA_SUPPORT
+#include <numaif.h>
+#endif
 #include "debug.h"
 #include "event.h"
 #include "mmap.h"
 }
 
 #ifdef HAVE_AIO_SUPPORT
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+{
+       map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+                                 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+       if (map->aio.data[idx] == MAP_FAILED) {
+               map->aio.data[idx] = NULL;
+               return -1;
+       }
+
+       return 0;
+}
+
+static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+{
+       if (map->aio.data[idx]) {
+               munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
+               map->aio.data[idx] = NULL;
+       }
+}
+
+static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity)
+{
+       void *data;
+       size_t mmap_len;
+       unsigned long node_mask;
+
+       if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
+               data = map->aio.data[idx];
+               mmap_len = perf_mmap__mmap_len(map);
+               node_mask = 1UL << cpu__get_node(cpu);
+               if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
+                       pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
+                               data, data + mmap_len, cpu__get_node(cpu));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+#else
+static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+{
+       map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
+       if (map->aio.data[idx] == NULL)
+               return -1;
+
+       return 0;
+}
+
+static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+{
+       zfree(&(map->aio.data[idx]));
+}
+
+static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused,
+               int cpu __maybe_unused, int affinity __maybe_unused)
+{
+       return 0;
+}
+#endif
+
 static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
 {
-       int delta_max, i, prio;
+       int delta_max, i, prio, ret;
 
        map->aio.nr_cblocks = mp->nr_cblocks;
        if (map->aio.nr_cblocks) {
                }
                delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
                for (i = 0; i < map->aio.nr_cblocks; ++i) {
-                       map->aio.data[i] = malloc(perf_mmap__mmap_len(map));
-                       if (!map->aio.data[i]) {
+                       ret = perf_mmap__aio_alloc(map, i);
+                       if (ret == -1) {
                                pr_debug2("failed to allocate data buffer area, error %m");
                                return -1;
                        }
+                       ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity);
+                       if (ret == -1)
+                               return -1;
                        /*
                         * Use cblock.aio_fildes value different from -1
                         * to denote started aio write operation on the
        int i;
 
        for (i = 0; i < map->aio.nr_cblocks; ++i)
-               zfree(&map->aio.data[i]);
+               perf_mmap__aio_free(map, i);
        if (map->aio.data)
                zfree(&map->aio.data);
        zfree(&map->aio.cblocks);