powerpc/cacheinfo: Lookup cache by dt node and thread-group id

author Gautham R. Shenoy <ego@linux.vnet.ibm.com>

Wed, 28 Jul 2021 17:56:05 +0000 (23:26 +0530)

committer Michael Ellerman <mpe@ellerman.id.au>

Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
author Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Wed, 28 Jul 2021 17:56:05 +0000 (23:26 +0530)
committer Michael Ellerman <mpe@ellerman.id.au>
Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index 03b3d010cbab667a48eef50c535859b468db5fae..1259040cc3a4adf3c2b8c568a1320f0f34524b9e 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -33,6 +33,9 @@ extern bool coregroup_enabled;
  extern int cpu_to_chip_id(int cpu);
  extern int *chip_id_lookup_table;
  
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
  #ifdef CONFIG_SMP
  
  struct smp_ops_t {
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c

index 6f903e9aa20ba1c74cbae117c7fa6df8d3d88532..5a6925d87424d91d68499bfba2454d7b16c8f6db 100644 (file)
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -120,6 +120,7 @@ struct cache {
         struct cpumask shared_cpu_map; /* online CPUs using this cache */
         int type;                      /* split cache disambiguation */
         int level;                     /* level not explicit in device tree */
+       int group_id;                  /* id of the group of threads that share this cache */
         struct list_head list;         /* global list of cache objects */
         struct cache *next_local;      /* next cache of >= level */
  };
@@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache)
  }
  
  static void cache_init(struct cache *cache, int type, int level,
-                      struct device_node *ofnode)
+                      struct device_node *ofnode, int group_id)
  {
         cache->type = type;
         cache->level = level;
         cache->ofnode = of_node_get(ofnode);
+       cache->group_id = group_id;
         INIT_LIST_HEAD(&cache->list);
         list_add(&cache->list, &cache_list);
  }
  
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+                              struct device_node *ofnode, int group_id)
  {
         struct cache *cache;
  
         cache = kzalloc(sizeof(*cache), GFP_KERNEL);
         if (cache)
-               cache_init(cache, type, level, ofnode);
+               cache_init(cache, type, level, ofnode, group_id);
  
         return cache;
  }
@@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
                 return cache;
  
         list_for_each_entry(iter, &cache_list, list)
-               if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+               if (iter->ofnode == cache->ofnode &&
+                   iter->group_id == cache->group_id &&
+                   iter->next_local == cache)
                         return iter;
  
         return cache;
  }
  
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+                                               int group_id)
  {
         struct cache *cache = NULL;
         struct cache *iter;
  
         list_for_each_entry(iter, &cache_list, list) {
-               if (iter->ofnode != node)
+               if (iter->ofnode != node ||
+                   iter->group_id != group_id)
                         continue;
                 cache = cache_find_first_sibling(iter);
                 break;
@@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node *np)
                 CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
  }
  
-static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+                                                 int level)
  {
         pr_debug("creating L%d ucache for %pOFP\n", level, node);
  
-       return new_cache(cache_is_unified_d(node), level, node);
+       return new_cache(cache_is_unified_d(node), level, node, group_id);
  }
  
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
                                                 int level)
  {
         struct cache *dcache, *icache;
@@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node,
         pr_debug("creating L%d dcache and icache for %pOFP\n", level,
                  node);
  
-       dcache = new_cache(CACHE_TYPE_DATA, level, node);
-       icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+       dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+       icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
  
         if (!dcache || !icache)
                 goto err;
@@ -382,31 +390,32 @@ err:
         return NULL;
  }
  
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
  {
         struct cache *cache;
  
         if (cache_node_is_unified(node))
-               cache = cache_do_one_devnode_unified(node, level);
+               cache = cache_do_one_devnode_unified(node, group_id, level);
         else
-               cache = cache_do_one_devnode_split(node, level);
+               cache = cache_do_one_devnode_split(node, group_id, level);
  
         return cache;
  }
  
  static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+                                                int group_id,
                                                  int level)
  {
         struct cache *cache;
  
-       cache = cache_lookup_by_node(node);
+       cache = cache_lookup_by_node_group(node, group_id);
  
         WARN_ONCE(cache && cache->level != level,
                   "cache level mismatch on lookup (got %d, expected %d)\n",
                   cache->level, level);
  
         if (!cache)
-               cache = cache_do_one_devnode(node, level);
+               cache = cache_do_one_devnode(node, group_id, level);
  
         return cache;
  }
@@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache *cache)
                   of_node_get_device_type(cache->ofnode));
  }
  
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+       if (has_big_cores && level == 1)
+               return cpumask_first(per_cpu(thread_group_l1_cache_map,
+                                            cpu_id));
+       else if (thread_group_shares_l2 && level == 2)
+               return cpumask_first(per_cpu(thread_group_l2_cache_map,
+                                            cpu_id));
+       return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
  {
         struct device_node *subcache_node;
         int level = cache->level;
@@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache)
  
         while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
                 struct cache *subcache;
+               int group_id;
  
                 level++;
-               subcache = cache_lookup_or_instantiate(subcache_node, level);
+               group_id = get_group_id(cpu_id, level);
+               subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
                 of_node_put(subcache_node);
                 if (!subcache)
                         break;
@@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
  {
         struct device_node *cpu_node;
         struct cache *cpu_cache = NULL;
+       int group_id;
  
         pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
  
@@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
         if (!cpu_node)
                 goto out;
  
-       cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+       group_id = get_group_id(cpu_id, 1);
+
+       cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
         if (!cpu_cache)
                 goto out;
  
-       do_subsidiary_caches(cpu_cache);
+       do_subsidiary_caches(cpu_cache, cpu_id);
  
         cache_cpu_set(cpu_cache, cpu_id);
  out:
@@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
  {
         struct device_node *cpu_node;
         struct cache *cache;
+       int group_id;
  
         cpu_node = of_get_cpu_node(cpu_id, NULL);
         WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
         if (!cpu_node)
                 return NULL;
  
-       cache = cache_lookup_by_node(cpu_node);
+       group_id = get_group_id(cpu_id, 1);
+       cache = cache_lookup_by_node_group(cpu_node, group_id);
         of_node_put(cpu_node);
  
         return cache;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 447b78a87c8f2adcb5f46c0073b7cdbea84ec275..a7fcac44a8e29b8801f70862566c5f276ea02640 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata;
   * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
   * the set its siblings that share the L1-cache.
   */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
  
  /*
   * On some big-cores system, thread_group_l2_cache_map for each CPU
   * corresponds to the set its siblings within the core that share the
   * L2-cache.
   */
-static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
  
  /* SMP operations for this machine */
  struct smp_ops_t *smp_ops;
author	Gautham R. Shenoy <ego@linux.vnet.ibm.com>
	Wed, 28 Jul 2021 17:56:05 +0000 (23:26 +0530)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Wed, 4 Aug 2021 00:53:39 +0000 (10:53 +1000)
arch/powerpc/include/asm/smp.h		patch \| blob \| history
arch/powerpc/kernel/cacheinfo.c		patch \| blob \| history
arch/powerpc/kernel/smp.c		patch \| blob \| history