]> www.infradead.org Git - users/willy/linux.git/commitdiff
cxl: Add mce notifier to emit aliased address for extended linear cache
authorDave Jiang <dave.jiang@intel.com>
Wed, 26 Feb 2025 16:21:21 +0000 (09:21 -0700)
committerDave Jiang <dave.jiang@intel.com>
Wed, 26 Feb 2025 21:13:49 +0000 (14:13 -0700)
Below is a setup with extended linear cache configuration with an example
layout of memory region shown below presented as a single memory region
consists of 256G memory where there's 128G of DRAM and 128G of CXL memory.
The kernel sees a region of total 256G of system memory.

              128G DRAM                          128G CXL memory
|-----------------------------------|-------------------------------------|

Data resides in either DRAM or far memory (FM) with no replication. Hot
data is swapped into DRAM by the hardware behind the scenes. When error is
detected in one location, it is possible that error also resides in the
aliased location. Therefore when a memory location that is flagged by MCE
is part of the special region, the aliased memory location needs to be
offlined as well.

Add an mce notify callback to identify if the MCE address location is part
of an extended linear cache region and handle accordingly.

Added symbol export to set_mce_nospec() in x86 code in order to call
set_mce_nospec() from the CXL MCE notify callback.

Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@dwillia2-xfh.jf.intel.com.notmuch/
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20250226162224.3633792-5-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
arch/x86/mm/pat/set_memory.c
drivers/cxl/Kconfig
drivers/cxl/core/Makefile
drivers/cxl/core/mbox.c
drivers/cxl/core/mce.c [new file with mode: 0644]
drivers/cxl/core/mce.h [new file with mode: 0644]
drivers/cxl/core/region.c
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h
tools/testing/cxl/Kbuild

index ef4514d64c0524e5854fa106e3f37ff1e1ba10a2..255a3d176956bf1e99b2a0e62bdd5aa530d4b246 100644 (file)
@@ -2081,6 +2081,7 @@ int set_mce_nospec(unsigned long pfn)
                pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
        return rc;
 }
+EXPORT_SYMBOL_GPL(set_mce_nospec);
 
 /* Restore full speculative operation to the pfn. */
 int clear_mce_nospec(unsigned long pfn)
index 876469e23f7a7f0d53da9f33ac81498e2754cd0a..d1c91dacae5695b58ab14281197f9e0a7e8b48ed 100644 (file)
@@ -146,4 +146,8 @@ config CXL_REGION_INVALIDATION_TEST
          If unsure, or if this kernel is meant for production environments,
          say N.
 
+config CXL_MCE
+       def_bool y
+       depends on X86_MCE && MEMORY_FAILURE
+
 endif
index 1a0c9c6ca8182dfa5fa715d1b86b795126a453c8..61c9332b3582e3013f4a0f979229a86b53a235f4 100644 (file)
@@ -17,3 +17,4 @@ cxl_core-y += cdat.o
 cxl_core-y += acpi.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
+cxl_core-$(CONFIG_CXL_MCE) += mce.o
index f26b96dd7410683b0364528d4a8e74204494bc4c..c06f19a729e8429c5d5ad4c4438764ddb65fe28a 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "core.h"
 #include "trace.h"
+#include "mce.h"
 
 static bool cxl_raw_allow_all;
 
@@ -1444,6 +1445,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
 struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
 {
        struct cxl_memdev_state *mds;
+       int rc;
 
        mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
        if (!mds) {
@@ -1459,6 +1461,10 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
        mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
        mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
 
+       rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
+       if (rc)
+               return ERR_PTR(rc);
+
        return mds;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL");
diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c
new file mode 100644 (file)
index 0000000..ff8d078
--- /dev/null
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/set_memory.h>
+#include <asm/mce.h>
+#include <cxlmem.h>
+#include "mce.h"
+
+static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
+                         void *data)
+{
+       struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state,
+                                                   mce_notifier);
+       struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
+       struct cxl_port *endpoint = cxlmd->endpoint;
+       struct mce *mce = data;
+       u64 spa, spa_alias;
+       unsigned long pfn;
+
+       if (!mce || !mce_usable_address(mce))
+               return NOTIFY_DONE;
+
+       if (!endpoint)
+               return NOTIFY_DONE;
+
+       spa = mce->addr & MCI_ADDR_PHYSADDR;
+
+       pfn = spa >> PAGE_SHIFT;
+       if (!pfn_valid(pfn))
+               return NOTIFY_DONE;
+
+       spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa);
+       if (spa_alias == ~0ULL)
+               return NOTIFY_DONE;
+
+       pfn = spa_alias >> PAGE_SHIFT;
+
+       /*
+        * Take down the aliased memory page. The original memory page flagged
+        * by the MCE will be taken cared of by the standard MCE handler.
+        */
+       dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n",
+                 spa_alias);
+       if (!memory_failure(pfn, 0))
+               set_mce_nospec(pfn);
+
+       return NOTIFY_OK;
+}
+
+static void cxl_unregister_mce_notifier(void *mce_notifier)
+{
+       mce_unregister_decode_chain(mce_notifier);
+}
+
+int devm_cxl_register_mce_notifier(struct device *dev,
+                                  struct notifier_block *mce_notifier)
+{
+       mce_notifier->notifier_call = cxl_handle_mce;
+       mce_notifier->priority = MCE_PRIO_UC;
+       mce_register_decode_chain(mce_notifier);
+
+       return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier,
+                                       mce_notifier);
+}
diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h
new file mode 100644 (file)
index 0000000..ace7342
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation. All rights reserved. */
+#ifndef _CXL_CORE_MCE_H_
+#define _CXL_CORE_MCE_H_
+
+#include <linux/notifier.h>
+
+#ifdef CONFIG_CXL_MCE
+int devm_cxl_register_mce_notifier(struct device *dev,
+                                  struct notifier_block *mce_notifer);
+#else
+static inline int
+devm_cxl_register_mce_notifier(struct device *dev,
+                              struct notifier_block *mce_notifier)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
+#endif
index a20ef3f10fef5040a3aa02e44345acceb41581da..c2b4162aee420dc13ba6daf714b81ae0d27a892b 100644 (file)
@@ -3447,6 +3447,34 @@ out:
 }
 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
 
+u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
+{
+       struct cxl_region_ref *iter;
+       unsigned long index;
+
+       if (!endpoint)
+               return ~0ULL;
+
+       guard(rwsem_write)(&cxl_region_rwsem);
+
+       xa_for_each(&endpoint->regions, index, iter) {
+               struct cxl_region_params *p = &iter->region->params;
+
+               if (p->res->start <= spa && spa <= p->res->end) {
+                       if (!p->cache_size)
+                               return ~0ULL;
+
+                       if (spa > p->res->start + p->cache_size)
+                               return spa - p->cache_size;
+
+                       return spa + p->cache_size;
+               }
+       }
+
+       return ~0ULL;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
+
 static int is_system_ram(struct resource *res, void *arg)
 {
        struct cxl_region *cxlr = arg;
index 7ee96867ac73d7d64198d66b5d0b40b42d681ec8..4785cff5209f7235c786b57ca2fb351e1b7a73e7 100644 (file)
@@ -877,6 +877,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
 int cxl_add_to_region(struct cxl_port *root,
                      struct cxl_endpoint_decoder *cxled);
 struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
+u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
 #else
 static inline bool is_cxl_pmem_region(struct device *dev)
 {
@@ -895,6 +896,11 @@ static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
 {
        return NULL;
 }
+static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
+                                              u64 spa)
+{
+       return 0;
+}
 #endif
 
 void cxl_endpoint_parse_cdat(struct cxl_port *port);
index 2a25d1957ddb9772b8d4dca92534ba76a909f8b3..55752cbf408c5a7793126857235b9ac5541d02a0 100644 (file)
@@ -477,6 +477,7 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
  * @poison: poison driver state info
  * @security: security driver state info
  * @fw: firmware upload / activation state
+ * @mce_notifier: MCE notifier
  *
  * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
@@ -503,6 +504,7 @@ struct cxl_memdev_state {
        struct cxl_poison_state poison;
        struct cxl_security_state security;
        struct cxl_fw_state fw;
+       struct notifier_block mce_notifier;
 };
 
 static inline struct cxl_memdev_state *
index 1ae13987a8a230a4530e83e67de8b68488d67b25..f625eb2d2dc57489dbdb64a019c04a76cd155568 100644 (file)
@@ -64,6 +64,7 @@ cxl_core-y += $(CXL_CORE_SRC)/cdat.o
 cxl_core-y += $(CXL_CORE_SRC)/acpi.o
 cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
 cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
 cxl_core-y += config_check.o
 cxl_core-y += cxl_core_test.o
 cxl_core-y += cxl_core_exports.o