]> www.infradead.org Git - linux.git/commitdiff
EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0
authorQiuxu Zhuo <qiuxu.zhuo@intel.com>
Thu, 17 Apr 2025 15:07:19 +0000 (23:07 +0800)
committerTony Luck <tony.luck@intel.com>
Thu, 17 Apr 2025 17:22:56 +0000 (10:22 -0700)
When enabling the retry_rd_err_log (RRL) feature during the loading of the
i10nm_edac driver with the module parameter retry_rd_err_log=2 (Linux RRL
control mode), the default values of the control bits of RRL are saved so
that they can be restored during the unloading of the driver.

In the current code, the RRL of pseudo channel 1 of HBM overwrites pseudo
channel 0 during the loading of the driver, resulting in the loss of saved
RRL for pseudo channel 0. This causes the RRL of pseudo channel 0 of HBM to
be wrongly restored with the values from pseudo channel 1 when unloading
the driver.

Fix this issue by creating two separate groups of RRL control registers
per channel to save default RRL settings of two {sub-,pseudo-}channels.

Fixes: acd4cf68fefe ("EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM")
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Tested-by: Feng Xu <feng.f.xu@intel.com>
Link: https://lore.kernel.org/r/20250417150724.1170168-3-qiuxu.zhuo@intel.com
drivers/edac/i10nm_base.c
drivers/edac/skx_common.h

index 355a977019e944e77b3096b5993d3f0a8b9c5897..355b527d839e78dee298eaa797d24cf67375a80e 100644 (file)
@@ -95,7 +95,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64,
 static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
 static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
 
-static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
+static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl,
                                      u32 *offsets_scrub, u32 *offsets_demand,
                                      u32 *offsets_demand2)
 {
@@ -108,10 +108,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
 
        if (enable) {
                /* Save default configurations */
-               imc->chan[chan].retry_rd_err_log_s = s;
-               imc->chan[chan].retry_rd_err_log_d = d;
+               rrl_ctl[0] = s;
+               rrl_ctl[1] = d;
                if (offsets_demand2)
-                       imc->chan[chan].retry_rd_err_log_d2 = d2;
+                       rrl_ctl[2] = d2;
 
                s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
                s |=  RETRY_RD_ERR_LOG_EN;
@@ -125,25 +125,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
                }
        } else {
                /* Restore default configurations */
-               if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
+               if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC)
                        s |=  RETRY_RD_ERR_LOG_UC;
-               if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
+               if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER)
                        s |=  RETRY_RD_ERR_LOG_NOOVER;
-               if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
+               if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN))
                        s &= ~RETRY_RD_ERR_LOG_EN;
-               if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
+               if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC)
                        d |=  RETRY_RD_ERR_LOG_UC;
-               if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
+               if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER)
                        d |=  RETRY_RD_ERR_LOG_NOOVER;
-               if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
+               if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN))
                        d &= ~RETRY_RD_ERR_LOG_EN;
 
                if (offsets_demand2) {
-                       if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
+                       if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC)
                                d2 |=  RETRY_RD_ERR_LOG_UC;
-                       if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
+                       if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER))
                                d2 &=  ~RETRY_RD_ERR_LOG_NOOVER;
-                       if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
+                       if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN))
                                d2 &= ~RETRY_RD_ERR_LOG_EN;
                }
        }
@@ -157,6 +157,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
 static void enable_retry_rd_err_log(bool enable)
 {
        int i, j, imc_num, chan_num;
+       struct skx_channel *chan;
        struct skx_imc *imc;
        struct skx_dev *d;
 
@@ -171,8 +172,9 @@ static void enable_retry_rd_err_log(bool enable)
                        if (!imc->mbase)
                                continue;
 
+                       chan = d->imc[i].chan;
                        for (j = 0; j < chan_num; j++)
-                               __enable_retry_rd_err_log(imc, j, enable,
+                               __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
                                                          res_cfg->offsets_scrub,
                                                          res_cfg->offsets_demand,
                                                          res_cfg->offsets_demand2);
@@ -186,12 +188,13 @@ static void enable_retry_rd_err_log(bool enable)
                        if (!imc->mbase || !imc->hbm_mc)
                                continue;
 
+                       chan = d->imc[i].chan;
                        for (j = 0; j < chan_num; j++) {
-                               __enable_retry_rd_err_log(imc, j, enable,
+                               __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
                                                          res_cfg->offsets_scrub_hbm0,
                                                          res_cfg->offsets_demand_hbm0,
                                                          NULL);
-                               __enable_retry_rd_err_log(imc, j, enable,
+                               __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1],
                                                          res_cfg->offsets_scrub_hbm1,
                                                          res_cfg->offsets_demand_hbm1,
                                                          NULL);
index ca5408803f87872d0c0092b5606a8a075c559c53..5afd425f3b4ff1ae827710cbd2ed46a1c8d2d933 100644 (file)
@@ -79,6 +79,9 @@
  */
 #define MCACOD_EXT_MEM_ERR     0x280
 
+/* Max RRL register sets per {,sub-,pseudo-}channel. */
+#define NUM_RRL_SET            3
+
 /*
  * Each cpu socket contains some pci devices that provide global
  * information, and also some that are local to each of the two
@@ -117,9 +120,11 @@ struct skx_dev {
                struct skx_channel {
                        struct pci_dev  *cdev;
                        struct pci_dev  *edev;
-                       u32 retry_rd_err_log_s;
-                       u32 retry_rd_err_log_d;
-                       u32 retry_rd_err_log_d2;
+                       /*
+                        * Two groups of RRL control registers per channel to save default RRL
+                        * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
+                        */
+                       u32 rrl_ctl[2][NUM_RRL_SET];
                        struct skx_dimm {
                                u8 close_pg;
                                u8 bank_xor_enable;