int                            sched_max_numa_distance;
 static int                     *sched_domains_numa_distance;
 static struct cpumask          ***sched_domains_numa_masks;
-
-static unsigned long __read_mostly *sched_numa_onlined_nodes;
 #endif
 
 /*
 
 static struct sched_domain_topology_level *sched_domain_topology =
        default_topology;
+static struct sched_domain_topology_level *sched_domain_topology_saved;
 
 #define for_each_sd_topology(tl)                       \
        for (tl = sched_domain_topology; tl->mask; tl++)
                return;
 
        sched_domain_topology = tl;
+       sched_domain_topology_saved = NULL;
 }
 
 #ifdef CONFIG_NUMA
 
        for (i = 0; i < nr_node_ids; i++) {
                printk(KERN_WARNING "  ");
-               for (j = 0; j < nr_node_ids; j++)
-                       printk(KERN_CONT "%02d ", node_distance(i,j));
+               for (j = 0; j < nr_node_ids; j++) {
+                       if (!node_state(i, N_CPU) || !node_state(j, N_CPU))
+                               printk(KERN_CONT "(%02d) ", node_distance(i,j));
+                       else
+                               printk(KERN_CONT " %02d  ", node_distance(i,j));
+               }
                printk(KERN_CONT "\n");
        }
        printk(KERN_WARNING "\n");
 
 bool find_numa_distance(int distance)
 {
-       int i;
+       bool found = false;
+       int i, *distances;
 
        if (distance == node_distance(0, 0))
                return true;
 
+       rcu_read_lock();
+       distances = rcu_dereference(sched_domains_numa_distance);
+       if (!distances)
+               goto unlock;
        for (i = 0; i < sched_domains_numa_levels; i++) {
-               if (sched_domains_numa_distance[i] == distance)
-                       return true;
+               if (distances[i] == distance) {
+                       found = true;
+                       break;
+               }
        }
+unlock:
+       rcu_read_unlock();
 
-       return false;
+       return found;
 }
 
+#define for_each_cpu_node_but(n, nbut)         \
+       for_each_node_state(n, N_CPU)           \
+               if (n == nbut)                  \
+                       continue;               \
+               else
+
 /*
  * A system can have three types of NUMA topology:
  * NUMA_DIRECT: all nodes are directly connected, or not a NUMA system
  *   there is an intermediary node C, which is < N hops away from both
  *   nodes A and B, the system is a glueless mesh.
  */
-static void init_numa_topology_type(void)
+static void init_numa_topology_type(int offline_node)
 {
        int a, b, c, n;
 
                return;
        }
 
-       for_each_online_node(a) {
-               for_each_online_node(b) {
+       for_each_cpu_node_but(a, offline_node) {
+               for_each_cpu_node_but(b, offline_node) {
                        /* Find two nodes furthest removed from each other. */
                        if (node_distance(a, b) < n)
                                continue;
 
                        /* Is there an intermediary node between a and b? */
-                       for_each_online_node(c) {
+                       for_each_cpu_node_but(c, offline_node) {
                                if (node_distance(a, c) < n &&
                                    node_distance(b, c) < n) {
                                        sched_numa_topology_type =
                        return;
                }
        }
+
+       pr_err("Failed to find a NUMA topology type, defaulting to DIRECT\n");
+       sched_numa_topology_type = NUMA_DIRECT;
 }
 
 
 #define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
 
-void sched_init_numa(void)
+void sched_init_numa(int offline_node)
 {
        struct sched_domain_topology_level *tl;
        unsigned long *distance_map;
        int nr_levels = 0;
        int i, j;
+       int *distances;
+       struct cpumask ***masks;
 
        /*
         * O(nr_nodes^2) deduplicating selection sort -- in order to find the
                return;
 
        bitmap_zero(distance_map, NR_DISTANCE_VALUES);
-       for (i = 0; i < nr_node_ids; i++) {
-               for (j = 0; j < nr_node_ids; j++) {
+       for_each_cpu_node_but(i, offline_node) {
+               for_each_cpu_node_but(j, offline_node) {
                        int distance = node_distance(i, j);
 
                        if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
                                sched_numa_warn("Invalid distance value range");
+                               bitmap_free(distance_map);
                                return;
                        }
 
         */
        nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
 
-       sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
-       if (!sched_domains_numa_distance) {
+       distances = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
+       if (!distances) {
                bitmap_free(distance_map);
                return;
        }
 
        for (i = 0, j = 0; i < nr_levels; i++, j++) {
                j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
-               sched_domains_numa_distance[i] = j;
+               distances[i] = j;
        }
+       rcu_assign_pointer(sched_domains_numa_distance, distances);
 
        bitmap_free(distance_map);
 
         */
        sched_domains_numa_levels = 0;
 
-       sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
-       if (!sched_domains_numa_masks)
+       masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
+       if (!masks)
                return;
 
        /*
         * CPUs of nodes that are that many hops away from us.
         */
        for (i = 0; i < nr_levels; i++) {
-               sched_domains_numa_masks[i] =
-                       kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
-               if (!sched_domains_numa_masks[i])
+               masks[i] = kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
+               if (!masks[i])
                        return;
 
-               for (j = 0; j < nr_node_ids; j++) {
+               for_each_cpu_node_but(j, offline_node) {
                        struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
                        int k;
 
                        if (!mask)
                                return;
 
-                       sched_domains_numa_masks[i][j] = mask;
-
-                       for_each_node(k) {
-                               /*
-                                * Distance information can be unreliable for
-                                * offline nodes, defer building the node
-                                * masks to its bringup.
-                                * This relies on all unique distance values
-                                * still being visible at init time.
-                                */
-                               if (!node_online(j))
-                                       continue;
+                       masks[i][j] = mask;
 
+                       for_each_cpu_node_but(k, offline_node) {
                                if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
                                        sched_numa_warn("Node-distance not symmetric");
 
                        }
                }
        }
+       rcu_assign_pointer(sched_domains_numa_masks, masks);
 
        /* Compute default topology size */
        for (i = 0; sched_domain_topology[i].mask; i++);
                };
        }
 
+       sched_domain_topology_saved = sched_domain_topology;
        sched_domain_topology = tl;
 
        sched_domains_numa_levels = nr_levels;
-       sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
-
-       init_numa_topology_type();
-
-       sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
-       if (!sched_numa_onlined_nodes)
-               return;
+       WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]);
 
-       bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
-       for_each_online_node(i)
-               bitmap_set(sched_numa_onlined_nodes, i, 1);
+       init_numa_topology_type(offline_node);
 }
 
-static void __sched_domains_numa_masks_set(unsigned int node)
-{
-       int i, j;
-
-       /*
-        * NUMA masks are not built for offline nodes in sched_init_numa().
-        * Thus, when a CPU of a never-onlined-before node gets plugged in,
-        * adding that new CPU to the right NUMA masks is not sufficient: the
-        * masks of that CPU's node must also be updated.
-        */
-       if (test_bit(node, sched_numa_onlined_nodes))
-               return;
 
-       bitmap_set(sched_numa_onlined_nodes, node, 1);
-
-       for (i = 0; i < sched_domains_numa_levels; i++) {
-               for (j = 0; j < nr_node_ids; j++) {
-                       if (!node_online(j) || node == j)
-                               continue;
+static void sched_reset_numa(void)
+{
+       int nr_levels, *distances;
+       struct cpumask ***masks;
 
-                       if (node_distance(j, node) > sched_domains_numa_distance[i])
+       nr_levels = sched_domains_numa_levels;
+       sched_domains_numa_levels = 0;
+       sched_max_numa_distance = 0;
+       sched_numa_topology_type = NUMA_DIRECT;
+       distances = sched_domains_numa_distance;
+       rcu_assign_pointer(sched_domains_numa_distance, NULL);
+       masks = sched_domains_numa_masks;
+       rcu_assign_pointer(sched_domains_numa_masks, NULL);
+       if (distances || masks) {
+               int i, j;
+
+               synchronize_rcu();
+               kfree(distances);
+               for (i = 0; i < nr_levels && masks; i++) {
+                       if (!masks[i])
                                continue;
-
-                       /* Add remote nodes in our masks */
-                       cpumask_or(sched_domains_numa_masks[i][node],
-                                  sched_domains_numa_masks[i][node],
-                                  sched_domains_numa_masks[0][j]);
+                       for_each_node(j)
+                               kfree(masks[i][j]);
+                       kfree(masks[i]);
                }
+               kfree(masks);
        }
+       if (sched_domain_topology_saved) {
+               kfree(sched_domain_topology);
+               sched_domain_topology = sched_domain_topology_saved;
+               sched_domain_topology_saved = NULL;
+       }
+}
+
+/*
+ * Call with hotplug lock held
+ */
+void sched_update_numa(int cpu, bool online)
+{
+       int node;
 
+       node = cpu_to_node(cpu);
        /*
-        * A new node has been brought up, potentially changing the topology
-        * classification.
-        *
-        * Note that this is racy vs any use of sched_numa_topology_type :/
+        * Scheduler NUMA topology is updated when the first CPU of a
+        * node is onlined or the last CPU of a node is offlined.
         */
-       init_numa_topology_type();
+       if (cpumask_weight(cpumask_of_node(node)) != 1)
+               return;
+
+       sched_reset_numa();
+       sched_init_numa(online ? NUMA_NO_NODE : node);
 }
 
 void sched_domains_numa_masks_set(unsigned int cpu)
        int node = cpu_to_node(cpu);
        int i, j;
 
-       __sched_domains_numa_masks_set(node);
-
        for (i = 0; i < sched_domains_numa_levels; i++) {
                for (j = 0; j < nr_node_ids; j++) {
-                       if (!node_online(j))
+                       if (!node_state(j, N_CPU))
                                continue;
 
                        /* Set ourselves in the remote node's masks */
        int i, j;
 
        for (i = 0; i < sched_domains_numa_levels; i++) {
-               for (j = 0; j < nr_node_ids; j++)
-                       cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+               for (j = 0; j < nr_node_ids; j++) {
+                       if (sched_domains_numa_masks[i][j])
+                               cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+               }
        }
 }
 
  */
 int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
 {
-       int i, j = cpu_to_node(cpu);
+       int i, j = cpu_to_node(cpu), found = nr_cpu_ids;
+       struct cpumask ***masks;
 
+       rcu_read_lock();
+       masks = rcu_dereference(sched_domains_numa_masks);
+       if (!masks)
+               goto unlock;
        for (i = 0; i < sched_domains_numa_levels; i++) {
-               cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
-               if (cpu < nr_cpu_ids)
-                       return cpu;
+               if (!masks[i][j])
+                       break;
+               cpu = cpumask_any_and(cpus, masks[i][j]);
+               if (cpu < nr_cpu_ids) {
+                       found = cpu;
+                       break;
+               }
        }
-       return nr_cpu_ids;
+unlock:
+       rcu_read_unlock();
+
+       return found;
 }
 
 #endif /* CONFIG_NUMA */