#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <linux/kernel.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 static void *alloc_value(struct bpf_map_info *info)
 {
        if (map_is_per_cpu(info->type))
-               return malloc(info->value_size * get_possible_cpus());
+               return malloc(round_up(info->value_size, 8) *
+                             get_possible_cpus());
        else
                return malloc(info->value_size);
 }
                jsonw_name(json_wtr, "value");
                print_hex_data_json(value, info->value_size);
        } else {
-               unsigned int i, n;
+               unsigned int i, n, step;
 
                n = get_possible_cpus();
+               step = round_up(info->value_size, 8);
 
                jsonw_name(json_wtr, "key");
                print_hex_data_json(key, info->key_size);
                        jsonw_int_field(json_wtr, "cpu", i);
 
                        jsonw_name(json_wtr, "value");
-                       print_hex_data_json(value + i * info->value_size,
+                       print_hex_data_json(value + i * step,
                                            info->value_size);
 
                        jsonw_end_object(json_wtr);
 
                printf("\n");
        } else {
-               unsigned int i, n;
+               unsigned int i, n, step;
 
                n = get_possible_cpus();
+               step = round_up(info->value_size, 8);
 
                printf("key:\n");
                fprint_hex(stdout, key, info->key_size, " ");
                for (i = 0; i < n; i++) {
                        printf("value (CPU %02d):%c",
                               i, info->value_size > 16 ? '\n' : ' ');
-                       fprint_hex(stdout, value + i * info->value_size,
+                       fprint_hex(stdout, value + i * step,
                                   info->value_size, " ");
                        printf("\n");
                }