]> www.infradead.org Git - users/hch/block.git/commitdiff
LoongArch: KVM: Add PV IPI support on guest side
authorBibo Mao <maobibo@loongson.cn>
Mon, 6 May 2024 14:00:47 +0000 (22:00 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Mon, 6 May 2024 14:00:47 +0000 (22:00 +0800)
PARAVIRT config option and PV IPI is added for the guest side, function
pv_ipi_init() is used to add IPI sending and IPI receiving hooks. This
function firstly checks whether system runs in VM mode, and if kernel
runs in VM mode, it will call function kvm_para_available() to detect
the current hypervirsor type (now only KVM type detection is supported).
The paravirt functions can work only if current hypervisor type is KVM,
since there is only KVM supported on LoongArch now.

PV IPI uses virtual IPI sender and virtual IPI receiver functions. With
virtual IPI sender, IPI message is stored in memory rather than emulated
HW. IPI multicast is also supported, and 128 vcpus can received IPIs
at the same time like X86 KVM method. Hypercall method is used for IPI
sending.

With virtual IPI receiver, HW SWI0 is used rather than real IPI HW.
Since VCPU has separate HW SWI0 like HW timer, there is no trap in IPI
interrupt acknowledge. Since IPI message is stored in memory, there is
no trap in getting IPI message.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/Kconfig
arch/loongarch/include/asm/hardirq.h
arch/loongarch/include/asm/paravirt.h [new file with mode: 0644]
arch/loongarch/include/asm/paravirt_api_clock.h [new file with mode: 0644]
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/irq.c
arch/loongarch/kernel/paravirt.c [new file with mode: 0644]
arch/loongarch/kernel/smp.c

index 54ad04dacdee94d869b2bd7d0ab92a92e60fe642..42331d9a8dd7e416cc13b18bc2b9b1d1605a3464 100644 (file)
@@ -632,6 +632,15 @@ config RANDOMIZE_BASE_MAX_OFFSET
 
 source "kernel/livepatch/Kconfig"
 
+config PARAVIRT
+       bool "Enable paravirtualization code"
+       depends on AS_HAS_LVZ_EXTENSION
+       help
+         This changes the kernel so it can modify itself when it is run
+         under a hypervisor, potentially improving performance significantly
+         over full virtualization.  However, when run without a hypervisor
+         the kernel is theoretically slower and slightly larger.
+
 endmenu
 
 config ARCH_SELECT_MEMORY_MODEL
index 08eb72e01b1a7298ed3b5d5b19e9de17f457e15b..d41138abcf26d60f75c4d5ebe74d9abd9e38afc7 100644 (file)
@@ -22,6 +22,7 @@ enum ipi_msg_type {
 typedef struct {
        unsigned int ipi_irqs[NR_IPI];
        unsigned int __softirq_pending;
+       atomic_t message ____cacheline_aligned_in_smp;
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
new file mode 100644 (file)
index 0000000..0965710
--- /dev/null
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_PARAVIRT_H
+#define _ASM_LOONGARCH_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+
+#include <linux/static_call_types.h>
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+       return static_call(pv_steal_clock)(cpu);
+}
+
+int __init pv_ipi_init(void);
+
+#else
+
+static inline int pv_ipi_init(void)
+{
+       return 0;
+}
+
+#endif // CONFIG_PARAVIRT
+#endif
diff --git a/arch/loongarch/include/asm/paravirt_api_clock.h b/arch/loongarch/include/asm/paravirt_api_clock.h
new file mode 100644 (file)
index 0000000..65ac7ce
--- /dev/null
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
index 3a7620b66bc6c0a53347decd98b73977059484e5..c9bfeda89e4076ced15e168eaf07ba10e757155b 100644 (file)
@@ -51,6 +51,7 @@ obj-$(CONFIG_MODULES)         += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 
 obj-$(CONFIG_PROC_FS)          += proc.o
+obj-$(CONFIG_PARAVIRT)         += paravirt.o
 
 obj-$(CONFIG_SMP)              += smp.o
 
index dcb1faf2c2b7e9d0195cdce32e54afc74b66893e..f4991c03514f48c7396ed094994c6cb41d87da8e 100644 (file)
@@ -113,5 +113,5 @@ void __init init_IRQ(void)
                        per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE);
        }
 
-       set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
+       set_csr_ecfg(ECFGF_SIP0 | ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC);
 }
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
new file mode 100644 (file)
index 0000000..1633ed4
--- /dev/null
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/jump_label.h>
+#include <linux/kvm_para.h>
+#include <linux/static_call.h>
+#include <asm/paravirt.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+       return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+#ifdef CONFIG_SMP
+static void pv_send_ipi_single(int cpu, unsigned int action)
+{
+       int min, old;
+       irq_cpustat_t *info = &per_cpu(irq_stat, cpu);
+
+       old = atomic_fetch_or(BIT(action), &info->message);
+       if (old)
+               return;
+
+       min = cpu_logical_map(cpu);
+       kvm_hypercall3(KVM_HCALL_FUNC_IPI, 1, 0, min);
+}
+
+#define KVM_IPI_CLUSTER_SIZE   (2 * BITS_PER_LONG)
+
+static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+       int i, cpu, min = 0, max = 0, old;
+       __uint128_t bitmap = 0;
+       irq_cpustat_t *info;
+
+       if (cpumask_empty(mask))
+               return;
+
+       action = BIT(action);
+       for_each_cpu(i, mask) {
+               info = &per_cpu(irq_stat, i);
+               old = atomic_fetch_or(action, &info->message);
+               if (old)
+                       continue;
+
+               cpu = cpu_logical_map(i);
+               if (!bitmap) {
+                       min = max = cpu;
+               } else if (cpu < min && cpu > (max - KVM_IPI_CLUSTER_SIZE)) {
+                       /* cpu < min, and bitmap still enough */
+                       bitmap <<= min - cpu;
+                       min = cpu;
+               } else if (cpu > min && cpu < (min + KVM_IPI_CLUSTER_SIZE)) {
+                       /* cpu > min, and bitmap still enough */
+                       max = cpu > max ? cpu : max;
+               } else {
+                       /*
+                        * With cpu, bitmap will exceed KVM_IPI_CLUSTER_SIZE,
+                        * send IPI here directly and skip the remaining CPUs.
+                        */
+                       kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap,
+                                     (unsigned long)(bitmap >> BITS_PER_LONG), min);
+                       min = max = cpu;
+                       bitmap = 0;
+               }
+               __set_bit(cpu - min, (unsigned long *)&bitmap);
+       }
+
+       if (bitmap)
+               kvm_hypercall3(KVM_HCALL_FUNC_IPI, (unsigned long)bitmap,
+                             (unsigned long)(bitmap >> BITS_PER_LONG), min);
+}
+
+static irqreturn_t pv_ipi_interrupt(int irq, void *dev)
+{
+       u32 action;
+       irq_cpustat_t *info;
+
+       /* Clear SWI interrupt */
+       clear_csr_estat(1 << INT_SWI0);
+       info = this_cpu_ptr(&irq_stat);
+       action = atomic_xchg(&info->message, 0);
+
+       if (action & SMP_RESCHEDULE) {
+               scheduler_ipi();
+               info->ipi_irqs[IPI_RESCHEDULE]++;
+       }
+
+       if (action & SMP_CALL_FUNCTION) {
+               generic_smp_call_function_interrupt();
+               info->ipi_irqs[IPI_CALL_FUNCTION]++;
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void pv_init_ipi(void)
+{
+       int r, swi;
+
+       swi = get_percpu_irq(INT_SWI0);
+       if (swi < 0)
+               panic("SWI0 IRQ mapping failed\n");
+       irq_set_percpu_devid(swi);
+       r = request_percpu_irq(swi, pv_ipi_interrupt, "SWI0-IPI", &irq_stat);
+       if (r < 0)
+               panic("SWI0 IRQ request failed\n");
+}
+#endif
+
+static bool kvm_para_available(void)
+{
+       int config;
+       static int hypervisor_type;
+
+       if (!hypervisor_type) {
+               config = read_cpucfg(CPUCFG_KVM_SIG);
+               if (!memcmp(&config, KVM_SIGNATURE, 4))
+                       hypervisor_type = HYPERVISOR_KVM;
+       }
+
+       return hypervisor_type == HYPERVISOR_KVM;
+}
+
+int __init pv_ipi_init(void)
+{
+       int feature;
+
+       if (!cpu_has_hypervisor)
+               return 0;
+       if (!kvm_para_available())
+               return 0;
+
+       feature = read_cpucfg(CPUCFG_KVM_FEATURE);
+       if (!(feature & KVM_FEATURE_IPI))
+               return 0;
+
+#ifdef CONFIG_SMP
+       mp_ops.init_ipi         = pv_init_ipi;
+       mp_ops.send_ipi_single  = pv_send_ipi_single;
+       mp_ops.send_ipi_mask    = pv_send_ipi_mask;
+#endif
+
+       return 0;
+}
index 15b75e671bcd1deda85fe21b7bdce7a6f07ffce4..0dfe2388ef413b673185ab88951b06c532559fff 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/loongson.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
+#include <asm/paravirt.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/time.h>
@@ -299,6 +300,7 @@ void __init loongson_smp_setup(void)
        cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
        cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
 
+       pv_ipi_init();
        iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
        pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
 }
@@ -343,7 +345,7 @@ void loongson_init_secondary(void)
 {
        unsigned int cpu = smp_processor_id();
        unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
-                            ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
+                            ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER | ECFGF_SIP0;
 
        change_csr_ecfg(ECFG0_IM, imask);