From bc88d44bd7e45b992cf8c2c2ffbc7bb3e24db4a7 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2024 11:41:05 +0100 Subject: [PATCH 01/16] irqchip/gic-v3-its: Fix over allocation in itt_alloc_pool() itt_alloc_pool() calls its_alloc_pages_node() to allocate an individual page to add to the pool (for allocations Signed-off-by: Steven Price Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/1f6e19c4-1fb9-43ab-a8a2-a465c9cff84b@arm.com Closes: https://lore.kernel.org/r/ed65312a-245c-4fa5-91ad-5d620cab7c6b%40nvidia.com --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index be77f927110d..395961fb2180 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -261,7 +261,7 @@ static void *itt_alloc_pool(int node, int size) if (addr) break; - page = its_alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 1); + page = its_alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); if (!page) break; -- 2.51.0 From 2396eefa075a31884d3336e1e94db47a0bd2a456 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 18 Oct 2024 20:08:25 +0800 Subject: [PATCH 02/16] genirq/devres: Don't free interrupt which is not managed by devres If devres_destroy() does not find a matching devres entry, then devm_free_irq() emits a warning and tries to free the interrupt. That's wrong as devm_free_irq() should only undo what devm_request_irq() set up. Replace devres_destroy() with a call to devres_release() which only invokes the release function (free_irq()) in case that a matching devres entry was found. [ tglx: Massaged change log ] Signed-off-by: Zijun Hu Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241018-devres_kernel_fix-v2-1-08918ae84982@quicinc.com --- kernel/irq/devres.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index b3e98668f4dd..eb16a58e0322 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -141,9 +141,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) { struct irq_devres match_data = { irq, dev_id }; - WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, + WARN_ON(devres_release(dev, devm_irq_release, devm_irq_match, &match_data)); - free_irq(irq, dev_id); } EXPORT_SYMBOL(devm_free_irq); -- 2.51.0 From d1a128bc3057a090b97ab5a9f938874df3d3f124 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 26 Oct 2024 17:40:29 +0200 Subject: [PATCH 03/16] genirq/irqdesc: Use str_enabled_disabled() helper in wakeup_show() Remove hard-coded strings by using the str_enabled_disabled() helper function. Signed-off-by: Thorsten Blum Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241026154029.158977-2-thorsten.blum@linux.dev --- kernel/irq/irqdesc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 479cf1caa610..0253e77fcd9a 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "internals.h" @@ -320,8 +321,7 @@ static ssize_t wakeup_show(struct kobject *kobj, ssize_t ret = 0; raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", - irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled"); + ret = sprintf(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); raw_spin_unlock_irq(&desc->lock); return ret; -- 2.51.0 From 496461050b74d44a2adb39511403a36c9a555bc7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Oct 2024 18:59:23 +0100 Subject: [PATCH 04/16] irqchip/mips-gic: Replace open coded online CPU iterations Several places in the MIPS GIC driver iterate over the online CPUs to operate on the CPU's GIC local register block, accessed via the GIC's other/redirect register block. Abstract the process of iterating over online CPUs & configuring the other/redirect region to access their registers through a new for_each_online_cpu_gic() macro and convert all usage sites over. Signed-off-by: Paul Burton Signed-off-by: Chao-ying Fu Signed-off-by: Dragan Mladjenovic Signed-off-by: Aleksandar Rikalo Signed-off-by: Thomas Gleixner Tested-by: Serge Semin Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/all/20241028175935.51250-2-arikalo@gmail.com --- drivers/irqchip/irq-mips-gic.c | 59 +++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 76253e864f23..6c7a7d2f0438 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -66,6 +66,44 @@ static struct gic_all_vpes_chip_data { bool mask; } gic_all_vpes_chip_data[GIC_NUM_LOCAL_INTRS]; +static int __gic_with_next_online_cpu(int prev) +{ + unsigned int cpu; + + /* Discover the next online CPU */ + cpu = cpumask_next(prev, cpu_online_mask); + + /* If there isn't one, we're done */ + if (cpu >= nr_cpu_ids) + return cpu; + + /* + * Move the access lock to the next CPU's GIC local register block. + * + * Set GIC_VL_OTHER. Since the caller holds gic_lock nothing can + * clobber the written value. + */ + write_gic_vl_other(mips_cm_vp_id(cpu)); + + return cpu; +} + +/** + * for_each_online_cpu_gic() - Iterate over online CPUs, access local registers + * @cpu: An integer variable to hold the current CPU number + * @gic_lock: A pointer to raw spin lock used as a guard + * + * Iterate over online CPUs & configure the other/redirect register region to + * access each CPUs GIC local register block, which can be accessed from the + * loop body using read_gic_vo_*() or write_gic_vo_*() accessor functions or + * their derivatives. + */ +#define for_each_online_cpu_gic(cpu, gic_lock) \ + guard(raw_spinlock_irqsave)(gic_lock); \ + for ((cpu) = __gic_with_next_online_cpu(-1); \ + (cpu) < nr_cpu_ids; \ + (cpu) = __gic_with_next_online_cpu(cpu)) + static void gic_clear_pcpu_masks(unsigned int intr) { unsigned int i; @@ -350,37 +388,27 @@ static struct irq_chip gic_local_irq_controller = { static void gic_mask_local_irq_all_vpes(struct irq_data *d) { struct gic_all_vpes_chip_data *cd; - unsigned long flags; int intr, cpu; intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); cd->mask = false; - raw_spin_lock_irqsave(&gic_lock, flags); - for_each_online_cpu(cpu) { - write_gic_vl_other(mips_cm_vp_id(cpu)); + for_each_online_cpu_gic(cpu, &gic_lock) write_gic_vo_rmask(BIT(intr)); - } - raw_spin_unlock_irqrestore(&gic_lock, flags); } static void gic_unmask_local_irq_all_vpes(struct irq_data *d) { struct gic_all_vpes_chip_data *cd; - unsigned long flags; int intr, cpu; intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); cd->mask = true; - raw_spin_lock_irqsave(&gic_lock, flags); - for_each_online_cpu(cpu) { - write_gic_vl_other(mips_cm_vp_id(cpu)); + for_each_online_cpu_gic(cpu, &gic_lock) write_gic_vo_smask(BIT(intr)); - } - raw_spin_unlock_irqrestore(&gic_lock, flags); } static void gic_all_vpes_irq_cpu_online(void) @@ -469,7 +497,6 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq) { struct gic_all_vpes_chip_data *cd; - unsigned long flags; unsigned int intr; int err, cpu; u32 map; @@ -533,12 +560,8 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, if (!gic_local_irq_is_routable(intr)) return -EPERM; - raw_spin_lock_irqsave(&gic_lock, flags); - for_each_online_cpu(cpu) { - write_gic_vl_other(mips_cm_vp_id(cpu)); + for_each_online_cpu_gic(cpu, &gic_lock) write_gic_vo_map(mips_gic_vx_map_reg(intr), map); - } - raw_spin_unlock_irqrestore(&gic_lock, flags); return 0; } -- 2.51.0 From d9e2ed610a6094534d13ea347c7b7a5bd7ce4ee5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Oct 2024 18:59:24 +0100 Subject: [PATCH 05/16] irqchip/mips-gic: Support multi-cluster in for_each_online_cpu_gic() Use CM's GCR_CL_REDIRECT register to access registers in remote clusters, so that users of gic_with_each_online_cpu() gains support for multi-cluster without further changes. Signed-off-by: Paul Burton Signed-off-by: Chao-ying Fu Signed-off-by: Dragan Mladjenovic Signed-off-by: Aleksandar Rikalo Signed-off-by: Thomas Gleixner Tested-by: Serge Semin Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/all/20241028175935.51250-3-arikalo@gmail.com --- drivers/irqchip/irq-mips-gic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 6c7a7d2f0438..29bdfdce2123 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -88,6 +88,12 @@ static int __gic_with_next_online_cpu(int prev) return cpu; } +static inline void gic_unlock_cluster(void) +{ + if (mips_cps_multicluster_cpus()) + mips_cm_unlock_other(); +} + /** * for_each_online_cpu_gic() - Iterate over online CPUs, access local registers * @cpu: An integer variable to hold the current CPU number @@ -102,6 +108,7 @@ static int __gic_with_next_online_cpu(int prev) guard(raw_spinlock_irqsave)(gic_lock); \ for ((cpu) = __gic_with_next_online_cpu(-1); \ (cpu) < nr_cpu_ids; \ + gic_unlock_cluster(), \ (cpu) = __gic_with_next_online_cpu(cpu)) static void gic_clear_pcpu_masks(unsigned int intr) -- 2.51.0 From c7c0d13d1d308b6e2a3d69274b38fca0167081df Mon Sep 17 00:00:00 2001 From: Chao-ying Fu Date: Mon, 28 Oct 2024 18:59:25 +0100 Subject: [PATCH 06/16] irqchip/mips-gic: Setup defaults in each cluster In multi-cluster MIPS I6500 systems, there is a GIC per cluster. The default shared interrupt setup configured in gic_of_init() applies only to the GIC in the cluster containing the boot CPU, leaving the GICs of other clusters unconfigured. Configure the other clusters as well. Signed-off-by: Chao-ying Fu Signed-off-by: Dragan Mladjenovic Signed-off-by: Aleksandar Rikalo Signed-off-by: Thomas Gleixner Tested-by: Serge Semin Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/all/20241028175935.51250-4-arikalo@gmail.com --- drivers/irqchip/irq-mips-gic.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 29bdfdce2123..d93a076620c7 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -764,7 +764,7 @@ static int gic_cpu_startup(unsigned int cpu) static int __init gic_of_init(struct device_node *node, struct device_node *parent) { - unsigned int cpu_vec, i, gicconfig; + unsigned int cpu_vec, i, gicconfig, cl, nclusters; unsigned long reserved; phys_addr_t gic_base; struct resource res; @@ -845,11 +845,29 @@ static int __init gic_of_init(struct device_node *node, board_bind_eic_interrupt = &gic_bind_eic_interrupt; - /* Setup defaults */ - for (i = 0; i < gic_shared_intrs; i++) { - change_gic_pol(i, GIC_POL_ACTIVE_HIGH); - change_gic_trig(i, GIC_TRIG_LEVEL); - write_gic_rmask(i); + /* + * Initialise each cluster's GIC shared registers to sane default + * values. + * Otherwise, the IPI set up will be erased if we move code + * to gic_cpu_startup for each cpu. + */ + nclusters = mips_cps_numclusters(); + for (cl = 0; cl < nclusters; cl++) { + if (cl == cpu_cluster(¤t_cpu_data)) { + for (i = 0; i < gic_shared_intrs; i++) { + change_gic_pol(i, GIC_POL_ACTIVE_HIGH); + change_gic_trig(i, GIC_TRIG_LEVEL); + write_gic_rmask(i); + } + } else { + mips_cm_lock_other(cl, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + for (i = 0; i < gic_shared_intrs; i++) { + change_gic_redir_pol(i, GIC_POL_ACTIVE_HIGH); + change_gic_redir_trig(i, GIC_TRIG_LEVEL); + write_gic_redir_rmask(i); + } + mips_cm_unlock_other(); + } } return cpuhp_setup_state(CPUHP_AP_IRQ_MIPS_GIC_STARTING, -- 2.51.0 From 322a9063876890895cb8308cc6f59de312e8aad5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 28 Oct 2024 18:59:26 +0100 Subject: [PATCH 07/16] irqchip/mips-gic: Multi-cluster support The MIPS I6500 CPU & CM (Coherence Manager) 3.5 introduce the concept of multiple clusters to the system. In these systems, each cluster contains its own GIC, so the GIC isn't truly global any longer. Access to registers in the GICs of remote clusters is possible using a redirect register block much like the redirect register blocks provided by the CM & CPC, and configured through the same GCR_REDIRECT register that mips_cm_lock_other() abstraction builds upon. It is expected that external interrupts are connected identically on all clusters. That is, if there is a device providing an interrupt connected to GIC interrupt pin 0 then it should be connected to pin 0 of every GIC in the system. For the most part, the GIC can be treated as though it is still truly global, so long as interrupts in the cluster are configured properly. Introduce support for such multi-cluster systems in the MIPS GIC irqchip driver. A newly introduced gic_irq_lock_cluster() function allows: 1) Configure access to a GIC in a remote cluster via the redirect register block, using mips_cm_lock_other(). Or: 2) Detect that the interrupt in question is affine to the local cluster and plain old GIC register access to the GIC in the local cluster should be used. It is possible to access the local cluster's GIC registers via the redirect block, but keeping the special case for them is both good for performance (because we avoid the locking & indirection overhead of using the redirect block) and necessary to maintain compatibility with systems using CM revisions prior to 3.5 which don't support the redirect block. The gic_irq_lock_cluster() function relies upon an IRQs effective affinity in order to discover which cluster the IRQ is affine to. In order to track this & allow it to be updated at an appropriate point during gic_set_affinity() select the generic support for effective affinity using CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK. gic_set_affinity() is the one function which gains much complexity. It now deconfigures routing to any VP(E), ie. CPU, on the old cluster when moving affinity to a new cluster. gic_shared_irq_domain_map() moves its update of the IRQs effective affinity to before its use of gic_irq_lock_cluster(), to ensure that operation is on the cluster the IRQ is affine to. The remaining changes are straightforward use of the gic_irq_lock_cluster() function to select between local cluster & remote cluster code-paths when configuring interrupts. Signed-off-by: Paul Burton Signed-off-by: Chao-ying Fu Signed-off-by: Dragan Mladjenovic Signed-off-by: Aleksandar Rikalo Signed-off-by: Thomas Gleixner Tested-by: Serge Semin Tested-by: Gregory CLEMENT Link: https://lore.kernel.org/all/20241028175935.51250-5-arikalo@gmail.com --- drivers/irqchip/Kconfig | 1 + drivers/irqchip/irq-mips-gic.c | 161 +++++++++++++++++++++++++++++---- 2 files changed, 143 insertions(+), 19 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 006a47a86ed5..f20adf759d40 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -352,6 +352,7 @@ config KEYSTONE_IRQ config MIPS_GIC bool + select GENERIC_IRQ_EFFECTIVE_AFF_MASK select GENERIC_IRQ_IPI if SMP select IRQ_DOMAIN_HIERARCHY select MIPS_CM diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index d93a076620c7..f42f69bbd6fb 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -111,6 +111,41 @@ static inline void gic_unlock_cluster(void) gic_unlock_cluster(), \ (cpu) = __gic_with_next_online_cpu(cpu)) +/** + * gic_irq_lock_cluster() - Lock redirect block access to IRQ's cluster + * @d: struct irq_data corresponding to the interrupt we're interested in + * + * Locks redirect register block access to the global register block of the GIC + * within the remote cluster that the IRQ corresponding to @d is affine to, + * returning true when this redirect block setup & locking has been performed. + * + * If @d is affine to the local cluster then no locking is performed and this + * function will return false, indicating to the caller that it should access + * the local clusters registers without the overhead of indirection through the + * redirect block. + * + * In summary, if this function returns true then the caller should access GIC + * registers using redirect register block accessors & then call + * mips_cm_unlock_other() when done. If this function returns false then the + * caller should trivially access GIC registers in the local cluster. + * + * Returns true if locking performed, else false. + */ +static bool gic_irq_lock_cluster(struct irq_data *d) +{ + unsigned int cpu, cl; + + cpu = cpumask_first(irq_data_get_effective_affinity_mask(d)); + BUG_ON(cpu >= NR_CPUS); + + cl = cpu_cluster(&cpu_data[cpu]); + if (cl == cpu_cluster(¤t_cpu_data)) + return false; + + mips_cm_lock_other(cl, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); + return true; +} + static void gic_clear_pcpu_masks(unsigned int intr) { unsigned int i; @@ -157,7 +192,12 @@ static void gic_send_ipi(struct irq_data *d, unsigned int cpu) { irq_hw_number_t hwirq = GIC_HWIRQ_TO_SHARED(irqd_to_hwirq(d)); - write_gic_wedge(GIC_WEDGE_RW | hwirq); + if (gic_irq_lock_cluster(d)) { + write_gic_redir_wedge(GIC_WEDGE_RW | hwirq); + mips_cm_unlock_other(); + } else { + write_gic_wedge(GIC_WEDGE_RW | hwirq); + } } int gic_get_c0_compare_int(void) @@ -225,7 +265,13 @@ static void gic_mask_irq(struct irq_data *d) { unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq); - write_gic_rmask(intr); + if (gic_irq_lock_cluster(d)) { + write_gic_redir_rmask(intr); + mips_cm_unlock_other(); + } else { + write_gic_rmask(intr); + } + gic_clear_pcpu_masks(intr); } @@ -234,7 +280,12 @@ static void gic_unmask_irq(struct irq_data *d) unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq); unsigned int cpu; - write_gic_smask(intr); + if (gic_irq_lock_cluster(d)) { + write_gic_redir_smask(intr); + mips_cm_unlock_other(); + } else { + write_gic_smask(intr); + } gic_clear_pcpu_masks(intr); cpu = cpumask_first(irq_data_get_effective_affinity_mask(d)); @@ -245,7 +296,12 @@ static void gic_ack_irq(struct irq_data *d) { unsigned int irq = GIC_HWIRQ_TO_SHARED(d->hwirq); - write_gic_wedge(irq); + if (gic_irq_lock_cluster(d)) { + write_gic_redir_wedge(irq); + mips_cm_unlock_other(); + } else { + write_gic_wedge(irq); + } } static int gic_set_type(struct irq_data *d, unsigned int type) @@ -285,9 +341,16 @@ static int gic_set_type(struct irq_data *d, unsigned int type) break; } - change_gic_pol(irq, pol); - change_gic_trig(irq, trig); - change_gic_dual(irq, dual); + if (gic_irq_lock_cluster(d)) { + change_gic_redir_pol(irq, pol); + change_gic_redir_trig(irq, trig); + change_gic_redir_dual(irq, dual); + mips_cm_unlock_other(); + } else { + change_gic_pol(irq, pol); + change_gic_trig(irq, trig); + change_gic_dual(irq, dual); + } if (trig == GIC_TRIG_EDGE) irq_set_chip_handler_name_locked(d, &gic_edge_irq_controller, @@ -305,25 +368,72 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { unsigned int irq = GIC_HWIRQ_TO_SHARED(d->hwirq); + unsigned int cpu, cl, old_cpu, old_cl; unsigned long flags; - unsigned int cpu; + /* + * The GIC specifies that we can only route an interrupt to one VP(E), + * ie. CPU in Linux parlance, at a time. Therefore we always route to + * the first online CPU in the mask. + */ cpu = cpumask_first_and(cpumask, cpu_online_mask); if (cpu >= NR_CPUS) return -EINVAL; - /* Assumption : cpumask refers to a single CPU */ - raw_spin_lock_irqsave(&gic_lock, flags); + old_cpu = cpumask_first(irq_data_get_effective_affinity_mask(d)); + old_cl = cpu_cluster(&cpu_data[old_cpu]); + cl = cpu_cluster(&cpu_data[cpu]); - /* Re-route this IRQ */ - write_gic_map_vp(irq, BIT(mips_cm_vp_id(cpu))); + raw_spin_lock_irqsave(&gic_lock, flags); - /* Update the pcpu_masks */ - gic_clear_pcpu_masks(irq); - if (read_gic_mask(irq)) - set_bit(irq, per_cpu_ptr(pcpu_masks, cpu)); + /* + * If we're moving affinity between clusters, stop routing the + * interrupt to any VP(E) in the old cluster. + */ + if (cl != old_cl) { + if (gic_irq_lock_cluster(d)) { + write_gic_redir_map_vp(irq, 0); + mips_cm_unlock_other(); + } else { + write_gic_map_vp(irq, 0); + } + } + /* + * Update effective affinity - after this gic_irq_lock_cluster() will + * begin operating on the new cluster. + */ irq_data_update_effective_affinity(d, cpumask_of(cpu)); + + /* + * If we're moving affinity between clusters, configure the interrupt + * trigger type in the new cluster. + */ + if (cl != old_cl) + gic_set_type(d, irqd_get_trigger_type(d)); + + /* Route the interrupt to its new VP(E) */ + if (gic_irq_lock_cluster(d)) { + write_gic_redir_map_pin(irq, + GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); + write_gic_redir_map_vp(irq, BIT(mips_cm_vp_id(cpu))); + + /* Update the pcpu_masks */ + gic_clear_pcpu_masks(irq); + if (read_gic_redir_mask(irq)) + set_bit(irq, per_cpu_ptr(pcpu_masks, cpu)); + + mips_cm_unlock_other(); + } else { + write_gic_map_pin(irq, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); + write_gic_map_vp(irq, BIT(mips_cm_vp_id(cpu))); + + /* Update the pcpu_masks */ + gic_clear_pcpu_masks(irq); + if (read_gic_mask(irq)) + set_bit(irq, per_cpu_ptr(pcpu_masks, cpu)); + } + raw_spin_unlock_irqrestore(&gic_lock, flags); return IRQ_SET_MASK_OK; @@ -471,11 +581,21 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, unsigned long flags; data = irq_get_irq_data(virq); + irq_data_update_effective_affinity(data, cpumask_of(cpu)); raw_spin_lock_irqsave(&gic_lock, flags); - write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); - write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); - irq_data_update_effective_affinity(data, cpumask_of(cpu)); + + /* Route the interrupt to its VP(E) */ + if (gic_irq_lock_cluster(data)) { + write_gic_redir_map_pin(intr, + GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); + write_gic_redir_map_vp(intr, BIT(mips_cm_vp_id(cpu))); + mips_cm_unlock_other(); + } else { + write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); + write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); + } + raw_spin_unlock_irqrestore(&gic_lock, flags); return 0; @@ -651,6 +771,9 @@ static int gic_ipi_domain_alloc(struct irq_domain *d, unsigned int virq, if (ret) goto error; + /* Set affinity to cpu. */ + irq_data_update_effective_affinity(irq_get_irq_data(virq + i), + cpumask_of(cpu)); ret = irq_set_irq_type(virq + i, IRQ_TYPE_EDGE_RISING); if (ret) goto error; -- 2.51.0 From d1cb1437b785f312d63f447e2e79ff768e7ccc29 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 28 Oct 2024 18:59:35 +0100 Subject: [PATCH 08/16] irqchip/mips-gic: Prevent indirect access to clusters without CPU cores It is possible to have zero CPU cores in a cluster; in such cases, it is not possible to access the GIC, and any indirect access leads to an exception. Prevent access to such clusters by checking the number of cores in the cluster at all places which issue indirect cluster access. Signed-off-by: Gregory CLEMENT Signed-off-by: Aleksandar Rikalo Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241028175935.51250-14-arikalo@gmail.com --- drivers/irqchip/irq-mips-gic.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index f42f69bbd6fb..bca8053864b2 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -141,7 +141,8 @@ static bool gic_irq_lock_cluster(struct irq_data *d) cl = cpu_cluster(&cpu_data[cpu]); if (cl == cpu_cluster(¤t_cpu_data)) return false; - + if (mips_cps_numcores(cl) == 0) + return false; mips_cm_lock_other(cl, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); return true; } @@ -507,6 +508,9 @@ static void gic_mask_local_irq_all_vpes(struct irq_data *d) struct gic_all_vpes_chip_data *cd; int intr, cpu; + if (!mips_cps_multicluster_cpus()) + return; + intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); cd->mask = false; @@ -520,6 +524,9 @@ static void gic_unmask_local_irq_all_vpes(struct irq_data *d) struct gic_all_vpes_chip_data *cd; int intr, cpu; + if (!mips_cps_multicluster_cpus()) + return; + intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); cd = irq_data_get_irq_chip_data(d); cd->mask = true; @@ -687,8 +694,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, if (!gic_local_irq_is_routable(intr)) return -EPERM; - for_each_online_cpu_gic(cpu, &gic_lock) - write_gic_vo_map(mips_gic_vx_map_reg(intr), map); + if (mips_cps_multicluster_cpus()) { + for_each_online_cpu_gic(cpu, &gic_lock) + write_gic_vo_map(mips_gic_vx_map_reg(intr), map); + } return 0; } @@ -982,7 +991,7 @@ static int __init gic_of_init(struct device_node *node, change_gic_trig(i, GIC_TRIG_LEVEL); write_gic_rmask(i); } - } else { + } else if (mips_cps_numcores(cl) != 0) { mips_cm_lock_other(cl, 0, 0, CM_GCR_Cx_OTHER_BLOCK_GLOBAL); for (i = 0; i < gic_shared_intrs; i++) { change_gic_redir_pol(i, GIC_POL_ACTIVE_HIGH); @@ -990,6 +999,9 @@ static int __init gic_of_init(struct device_node *node, write_gic_redir_rmask(i); } mips_cm_unlock_other(); + + } else { + pr_warn("No CPU cores on the cluster %d skip it\n", cl); } } -- 2.51.0 From 0053892ff7d4bab5efdb4def0fd211ec36e26f69 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 1 Nov 2024 09:33:05 -0700 Subject: [PATCH 09/16] irqchip/mips-gic: Fix selection of GENERIC_IRQ_EFFECTIVE_AFF_MASK Without SMP enabled (such as in allnoconfig), there is a Kconfig warning because CONFIG_IRQ_EFFECTIVE_AFF_MASK is unconditionally selected by CONFIG_MIPS_GIC: WARNING: unmet direct dependencies detected for GENERIC_IRQ_EFFECTIVE_AFF_MASK Depends on [n]: SMP [=n] Selected by [y]: - MIPS_GIC [=y] Add a dependency on SMP to the selection, which matches all other selections of CONFIG_IRQ_EFFECTIVE_AFF_MASK. Fixes: 322a90638768 ("irqchip/mips-gic: Multi-cluster support") Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241101-mips-fix-generic_irq_effective_aff_mask-select-v1-1-d94db6e0de0d@kernel.org --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index f20adf759d40..ef0fa6975dab 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -352,7 +352,7 @@ config KEYSTONE_IRQ config MIPS_GIC bool - select GENERIC_IRQ_EFFECTIVE_AFF_MASK + select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_IPI if SMP select IRQ_DOMAIN_HIERARCHY select MIPS_CM -- 2.51.0 From 194c4f569eac889d9b0822bc001771683b6e9b8a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 4 Nov 2024 13:08:35 -0600 Subject: [PATCH 10/16] irqchip/stm32mp-exti: Use of_property_present() for non-boolean properties The use of of_property_read_bool() for non-boolean properties is deprecated in favor of of_property_present() when testing for property presence. Signed-off-by: Rob Herring (Arm) Signed-off-by: Thomas Gleixner Reviewed-by: Antonio Borneo Link: https://lore.kernel.org/all/20241104190836.278117-1-robh@kernel.org --- drivers/irqchip/irq-stm32mp-exti.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-stm32mp-exti.c b/drivers/irqchip/irq-stm32mp-exti.c index 33e0cfdea654..cb83d6cc6113 100644 --- a/drivers/irqchip/irq-stm32mp-exti.c +++ b/drivers/irqchip/irq-stm32mp-exti.c @@ -696,8 +696,7 @@ static int stm32mp_exti_probe(struct platform_device *pdev) if (ret) return ret; - if (of_property_read_bool(np, "interrupts-extended")) - host_data->dt_has_irqs_desc = true; + host_data->dt_has_irqs_desc = of_property_present(np, "interrupts-extended"); return 0; } -- 2.51.0 From 2631c2b8e5c3406af62b60413ea04e155be9ebcc Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 31 Oct 2024 14:08:57 +0800 Subject: [PATCH 11/16] dt-bindings: interrupt-controller: Add T-HEAD C900 ACLINT SSWI device Sophgo SG2044 has a new version of T-HEAD C920, which implement a fully featured T-HEAD ACLINT device. This ACLINT device contains a SSWI device to support fast S-mode IPI. Add necessary compatible string for the T-HEAD ACLINT SSWI device. Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Reviewed-by: Conor Dooley Link: https://lore.kernel.org/all/20241031060859.722258-2-inochiama@gmail.com Link: https://www.xrvm.com/product/xuantie/C920 --- .../thead,c900-aclint-sswi.yaml | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml b/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml new file mode 100644 index 000000000000..8d330906bbbd --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/thead,c900-aclint-sswi.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/thead,c900-aclint-sswi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: T-HEAD C900 ACLINT Supervisor-level Software Interrupt Device + +maintainers: + - Inochi Amaoto + +description: + The SSWI device is a part of the THEAD ACLINT device. It provides + supervisor-level IPI functionality for a set of HARTs on a THEAD + platform. It provides a register to set an IPI (SETSSIP) for each + HART connected to the SSWI device. + +properties: + compatible: + items: + - enum: + - sophgo,sg2044-aclint-sswi + - const: thead,c900-aclint-sswi + + reg: + maxItems: 1 + + "#interrupt-cells": + const: 0 + + interrupt-controller: true + + interrupts-extended: + minItems: 1 + maxItems: 4095 + +additionalProperties: false + +required: + - compatible + - reg + - "#interrupt-cells" + - interrupt-controller + - interrupts-extended + +examples: + - | + interrupt-controller@94000000 { + compatible = "sophgo,sg2044-aclint-sswi", "thead,c900-aclint-sswi"; + reg = <0x94000000 0x00004000>; + #interrupt-cells = <0>; + interrupt-controller; + interrupts-extended = <&cpu1intc 1>, + <&cpu2intc 1>, + <&cpu3intc 1>, + <&cpu4intc 1>; + }; +... -- 2.51.0 From 25caea955cc950507d179f3ef456404b475e8c23 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 31 Oct 2024 14:08:58 +0800 Subject: [PATCH 12/16] irqchip: Add T-HEAD C900 ACLINT SSWI driver Add a driver for the T-HEAD C900 ACLINT SSWI device. This device allows the system with T-HEAD cpus to send ipi via fast device interface. Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241031060859.722258-3-inochiama@gmail.com --- drivers/irqchip/Kconfig | 12 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-thead-c900-aclint-sswi.c | 176 +++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 190 insertions(+) create mode 100644 drivers/irqchip/irq-thead-c900-aclint-sswi.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index ef0fa6975dab..9cac13663871 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -619,6 +619,18 @@ config STARFIVE_JH8100_INTC If you don't know what to do here, say Y. +config THEAD_C900_ACLINT_SSWI + bool "THEAD C9XX ACLINT S-mode IPI Interrupt Controller" + depends on RISCV + depends on SMP + select IRQ_DOMAIN_HIERARCHY + select GENERIC_IRQ_IPI_MUX + help + This enables support for T-HEAD specific ACLINT SSWI device + support. + + If you don't know what to do here, say Y. + config EXYNOS_IRQ_COMBINER bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 15ed6e952ecd..25e9ad29b8c4 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_RISCV_APLIC_MSI) += irq-riscv-aplic-msi.o obj-$(CONFIG_RISCV_IMSIC) += irq-riscv-imsic-state.o irq-riscv-imsic-early.o irq-riscv-imsic-platform.o obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o obj-$(CONFIG_STARFIVE_JH8100_INTC) += irq-starfive-jh8100-intc.o +obj-$(CONFIG_THEAD_C900_ACLINT_SSWI) += irq-thead-c900-aclint-sswi.o obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o obj-$(CONFIG_IMX_INTMUX) += irq-imx-intmux.o obj-$(CONFIG_IMX_MU_MSI) += irq-imx-mu-msi.o diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c new file mode 100644 index 000000000000..b0e366ade427 --- /dev/null +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Inochi Amaoto + */ + +#define pr_fmt(fmt) "thead-c900-aclint-sswi: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define THEAD_ACLINT_xSWI_REGISTER_SIZE 4 + +#define THEAD_C9XX_CSR_SXSTATUS 0x5c0 +#define THEAD_C9XX_SXSTATUS_CLINTEE BIT(17) + +static int sswi_ipi_virq __ro_after_init; +static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); + +static void thead_aclint_sswi_ipi_send(unsigned int cpu) +{ + writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); +} + +static void thead_aclint_sswi_ipi_clear(void) +{ + writel_relaxed(0x0, this_cpu_read(sswi_cpu_regs)); +} + +static void thead_aclint_sswi_ipi_handle(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + + chained_irq_enter(chip, desc); + + csr_clear(CSR_IP, IE_SIE); + thead_aclint_sswi_ipi_clear(); + + ipi_mux_process(); + + chained_irq_exit(chip, desc); +} + +static int thead_aclint_sswi_starting_cpu(unsigned int cpu) +{ + enable_percpu_irq(sswi_ipi_virq, irq_get_trigger_type(sswi_ipi_virq)); + + return 0; +} + +static int thead_aclint_sswi_dying_cpu(unsigned int cpu) +{ + thead_aclint_sswi_ipi_clear(); + + disable_percpu_irq(sswi_ipi_virq); + + return 0; +} + +static int __init thead_aclint_sswi_parse_irq(struct fwnode_handle *fwnode, + void __iomem *reg) +{ + struct of_phandle_args parent; + unsigned long hartid; + u32 contexts, i; + int rc, cpu; + + contexts = of_irq_count(to_of_node(fwnode)); + if (!(contexts)) { + pr_err("%pfwP: no ACLINT SSWI context available\n", fwnode); + return -EINVAL; + } + + for (i = 0; i < contexts; i++) { + rc = of_irq_parse_one(to_of_node(fwnode), i, &parent); + if (rc) + return rc; + + rc = riscv_of_parent_hartid(parent.np, &hartid); + if (rc) + return rc; + + if (parent.args[0] != RV_IRQ_SOFT) + return -ENOTSUPP; + + cpu = riscv_hartid_to_cpuid(hartid); + + per_cpu(sswi_cpu_regs, cpu) = reg + i * THEAD_ACLINT_xSWI_REGISTER_SIZE; + } + + pr_info("%pfwP: register %u CPU%s\n", fwnode, contexts, str_plural(contexts)); + + return 0; +} + +static int __init thead_aclint_sswi_probe(struct fwnode_handle *fwnode) +{ + struct irq_domain *domain; + void __iomem *reg; + int virq, rc; + + /* If it is T-HEAD CPU, check whether SSWI is enabled */ + if (riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && + !(csr_read(THEAD_C9XX_CSR_SXSTATUS) & THEAD_C9XX_SXSTATUS_CLINTEE)) + return -ENOTSUPP; + + if (!is_of_node(fwnode)) + return -EINVAL; + + reg = of_iomap(to_of_node(fwnode), 0); + if (!reg) + return -ENOMEM; + + /* Parse SSWI setting */ + rc = thead_aclint_sswi_parse_irq(fwnode, reg); + if (rc < 0) + return rc; + + /* If mulitple SSWI devices are present, do not register irq again */ + if (sswi_ipi_virq) + return 0; + + /* Find riscv intc domain and create IPI irq mapping */ + domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); + if (!domain) { + pr_err("%pfwP: Failed to find INTC domain\n", fwnode); + return -ENOENT; + } + + sswi_ipi_virq = irq_create_mapping(domain, RV_IRQ_SOFT); + if (!sswi_ipi_virq) { + pr_err("unable to create ACLINT SSWI IRQ mapping\n"); + return -ENOMEM; + } + + /* Register SSWI irq and handler */ + virq = ipi_mux_create(BITS_PER_BYTE, thead_aclint_sswi_ipi_send); + if (virq <= 0) { + pr_err("unable to create muxed IPIs\n"); + irq_dispose_mapping(sswi_ipi_virq); + return virq < 0 ? virq : -ENOMEM; + } + + irq_set_chained_handler(sswi_ipi_virq, thead_aclint_sswi_ipi_handle); + + cpuhp_setup_state(CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING, + "irqchip/thead-aclint-sswi:starting", + thead_aclint_sswi_starting_cpu, + thead_aclint_sswi_dying_cpu); + + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); + + /* Announce that SSWI is providing IPIs */ + pr_info("providing IPIs using THEAD ACLINT SSWI\n"); + + return 0; +} + +static int __init thead_aclint_sswi_early_probe(struct device_node *node, + struct device_node *parent) +{ + return thead_aclint_sswi_probe(&node->fwnode); +} +IRQCHIP_DECLARE(thead_aclint_sswi, "thead,c900-aclint-sswi", thead_aclint_sswi_early_probe); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2361ed4d2b15..799052249c7b 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -147,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_EIOINTC_STARTING, CPUHP_AP_IRQ_AVECINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, -- 2.51.0 From 6da33567c0bf24b642f69b029a2e9ea51fa75472 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Thu, 31 Oct 2024 14:08:59 +0800 Subject: [PATCH 13/16] riscv: defconfig: Enable T-HEAD C900 ACLINT SSWI drivers Add support for T-HEAD C900 ACLINT SSWI irqchip. Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241031060859.722258-4-inochiama@gmail.com --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 2341393cfac1..5b1d6325df85 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -256,6 +256,7 @@ CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_PM_DEVFREQ=y CONFIG_IIO=y +CONFIG_THEAD_C900_ACLINT_SSWI=y CONFIG_PHY_SUN4I_USB=m CONFIG_PHY_STARFIVE_JH7110_DPHY_RX=m CONFIG_PHY_STARFIVE_JH7110_PCIE=m -- 2.51.0 From 7a7f5065bc1dd8c463fc55f18ad43907c16571ee Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 6 Nov 2024 15:51:37 +0100 Subject: [PATCH 14/16] hrtimer: Use __raise_softirq_irqoff() to raise the softirq Raising the hrtimer soft interrupt is always done from hard interrupt context, so it can be reduced to just setting the HRTIMER soft interrupt flag. The soft interrupt will be invoked on return from interrupt. Use therefore __raise_softirq_irqoff() to raise the HRTIMER soft interrupt, which is a trivial optimization. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241106150419.2593080-2-bigeasy@linutronix.de --- kernel/time/hrtimer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index cddcd08ea827..5402e0f24217 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1811,7 +1811,7 @@ retry: if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); @@ -1906,7 +1906,7 @@ void hrtimer_run_queues(void) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); -- 2.51.0 From a02976cfce4fe8336c6be08cd4dc35ca1aa794e9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 6 Nov 2024 15:51:38 +0100 Subject: [PATCH 15/16] timers: Use __raise_softirq_irqoff() to raise the softirq. Raising the timer soft interrupt is always done from hard interrupt context, so it can be reduced to just setting the TIMER soft interrupt flag. The soft interrupt will be invoked on return from interrupt. Use therefore __raise_softirq_irqoff() to raise the TIMER soft interrupt, which is a trivial optimization. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241106150419.2593080-3-bigeasy@linutronix.de --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0fc9d066a7be..1759de934284 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2499,7 +2499,7 @@ static void run_local_timers(void) */ if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) || (i == BASE_DEF && tmigr_requires_handle_remote())) { - raise_softirq(TIMER_SOFTIRQ); + __raise_softirq_irqoff(TIMER_SOFTIRQ); return; } } -- 2.51.0 From 49a17639508c3b35f90ca829e60dddeeeb750e74 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 6 Nov 2024 15:51:39 +0100 Subject: [PATCH 16/16] softirq: Use a dedicated thread for timer wakeups on PREEMPT_RT. The timer and hrtimer soft interrupts are raised in hard interrupt context. With threaded interrupts force enabled or on PREEMPT_RT this leads to waking the ksoftirqd for the processing of the soft interrupt. ksoftirqd runs as SCHED_OTHER task which means it will compete with other tasks for CPU resources. This can introduce long delays for timer processing on heavy loaded systems and is not desired. Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated timers thread and let it run at the lowest SCHED_FIFO priority. Wake-ups for RT tasks happen from hardirq context so only timer_list timers and hrtimers for "regular" tasks are processed here. The higher priority ensures that wakeups are performed before scheduling SCHED_OTHER tasks. Using a dedicated variable to store the pending softirq bits values ensure that the timer are not accidentally picked up by ksoftirqd and other threaded interrupts. It shouldn't be picked up by ksoftirqd since it runs at lower priority. However if ksoftirqd is already running while a timer fires, then ksoftird will be PI-boosted due to the BH-lock to ktimer's priority. The timer thread can pick up pending softirqs from ksoftirqd but only if the softirq load is high. It is not be desired that the picked up softirqs are processed at SCHED_FIFO priority under high softirq load but this can already happen by a PI-boost by a force-threaded interrupt. [ frederic@kernel.org: rcutorture.c fixes, storm fix by introduction of local_timers_pending() for tick_nohz_next_event() ] [ junxiao.chang@intel.com: Ensure ktimersd gets woken up even if a softirq is currently served. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney [rcutorture] Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/all/20241106150419.2593080-4-bigeasy@linutronix.de --- include/linux/interrupt.h | 47 ++++++++++++++++++++++++++ kernel/rcu/rcutorture.c | 8 +++++ kernel/softirq.c | 69 ++++++++++++++++++++++++++++++++++++++- kernel/time/hrtimer.c | 4 +-- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 2 +- 6 files changed, 127 insertions(+), 5 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 457151f9f263..8cd9327e4e78 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -616,6 +616,53 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* + * With forced-threaded interrupts enabled a raised softirq is deferred to + * ksoftirqd unless it can be handled within the threaded interrupt. This + * affects timer_list timers and hrtimers which are explicitly marked with + * HRTIMER_MODE_SOFT. + * With PREEMPT_RT enabled more hrtimers are moved to softirq for processing + * which includes all timers which are not explicitly marked HRTIMER_MODE_HARD. + * Userspace controlled timers (like the clock_nanosleep() interface) is divided + * into two categories: Tasks with elevated scheduling policy including + * SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the + * elevated scheduling policy are woken up directly from the HARDIRQ while all + * other wake ups are delayed to softirq and so to ksoftirqd. + * + * The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it + * handles the softirq in an overloaded situation (not handled everything + * within its last run). + * If the timers are handled at SCHED_OTHER priority then they competes with all + * other SCHED_OTHER tasks for CPU resources are possibly delayed. + * Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures + * that timer are performed before scheduling any SCHED_OTHER thread. + */ +DECLARE_PER_CPU(struct task_struct *, ktimerd); +DECLARE_PER_CPU(unsigned long, pending_timer_softirq); +void raise_ktimers_thread(unsigned int nr); + +static inline unsigned int local_timers_pending_force_th(void) +{ + return __this_cpu_read(pending_timer_softirq); +} + +static inline void raise_timer_softirq(unsigned int nr) +{ + lockdep_assert_in_irq(); + if (force_irqthreads()) + raise_ktimers_thread(nr); + else + __raise_softirq_irqoff(nr); +} + +static inline unsigned int local_timers_pending(void) +{ + if (force_irqthreads()) + return local_timers_pending_force_th(); + else + return local_softirq_pending(); +} + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bb75dbf5c800..270c31a1e857 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2440,6 +2440,14 @@ static int rcutorture_booster_init(unsigned int cpu) WARN_ON_ONCE(!t); sp.sched_priority = 2; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); +#ifdef CONFIG_IRQ_FORCED_THREADING + if (force_irqthreads()) { + t = per_cpu(ktimerd, cpu); + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + } +#endif } /* Don't allow time recalculation while creating a new task. */ diff --git a/kernel/softirq.c b/kernel/softirq.c index d082e7840f88..7b525c904462 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -624,6 +624,24 @@ static inline void tick_irq_exit(void) #endif } +#ifdef CONFIG_IRQ_FORCED_THREADING +DEFINE_PER_CPU(struct task_struct *, ktimerd); +DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + +static void wake_timersd(void) +{ + struct task_struct *tsk = __this_cpu_read(ktimerd); + + if (tsk) + wake_up_process(tsk); +} + +#else + +static inline void wake_timersd(void) { } + +#endif + static inline void __irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED @@ -636,6 +654,10 @@ static inline void __irq_exit_rcu(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); + if (IS_ENABLED(CONFIG_IRQ_FORCED_THREADING) && force_irqthreads() && + local_timers_pending_force_th() && !(in_nmi() | in_hardirq())) + wake_timersd(); + tick_irq_exit(); } @@ -971,12 +993,57 @@ static struct smp_hotplug_thread softirq_threads = { .thread_comm = "ksoftirqd/%u", }; +#ifdef CONFIG_IRQ_FORCED_THREADING +static void ktimerd_setup(unsigned int cpu) +{ + /* Above SCHED_NORMAL to handle timers before regular tasks. */ + sched_set_fifo_low(current); +} + +static int ktimerd_should_run(unsigned int cpu) +{ + return local_timers_pending_force_th(); +} + +void raise_ktimers_thread(unsigned int nr) +{ + trace_softirq_raise(nr); + __this_cpu_or(pending_timer_softirq, BIT(nr)); +} + +static void run_ktimerd(unsigned int cpu) +{ + unsigned int timer_si; + + ksoftirqd_run_begin(); + + timer_si = local_timers_pending_force_th(); + __this_cpu_write(pending_timer_softirq, 0); + or_softirq_pending(timer_si); + + __do_softirq(); + + ksoftirqd_run_end(); +} + +static struct smp_hotplug_thread timer_thread = { + .store = &ktimerd, + .setup = ktimerd_setup, + .thread_should_run = ktimerd_should_run, + .thread_fn = run_ktimerd, + .thread_comm = "ktimers/%u", +}; +#endif + static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); - +#ifdef CONFIG_IRQ_FORCED_THREADING + if (force_irqthreads()) + BUG_ON(smpboot_register_percpu_thread(&timer_thread)); +#endif return 0; } early_initcall(spawn_ksoftirqd); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5402e0f24217..d9911516e743 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1811,7 +1811,7 @@ retry: if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_timer_softirq(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); @@ -1906,7 +1906,7 @@ void hrtimer_run_queues(void) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_timer_softirq(HRTIMER_SOFTIRQ); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 753a184c7090..976a212cca2e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -859,7 +859,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & BIT(TIMER_SOFTIRQ); + return local_timers_pending() & BIT(TIMER_SOFTIRQ); } /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 1759de934284..06f0bc1db6d9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2499,7 +2499,7 @@ static void run_local_timers(void) */ if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) || (i == BASE_DEF && tmigr_requires_handle_remote())) { - __raise_softirq_irqoff(TIMER_SOFTIRQ); + raise_timer_softirq(TIMER_SOFTIRQ); return; } } -- 2.51.0