diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 20:01:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 20:01:29 +0300 |
commit | 9776dd36095be19f5a0ad9f07a4fc221d2a0609a (patch) | |
tree | bd2bf5c11cf57fc187569670911a51f0b16f2796 /arch/x86/include | |
parent | 6bfd2d442af5c373042f196eef1915e1f6ac058a (diff) | |
parent | 6ecc2e7932fe8f132d3b671685f9995785f19e9a (diff) | |
download | linux-9776dd36095be19f5a0ad9f07a4fc221d2a0609a.tar.xz |
Merge tag 'x86-irq-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 interrupt handling updates from Thomas Gleixner:
"Add support for posted interrupts on bare metal.
Posted interrupts is a virtualization feature which allows to inject
interrupts directly into a guest without host interaction. The VT-d
interrupt remapping hardware sets the bit which corresponds to the
interrupt vector in a vector bitmap which is either used to inject the
interrupt directly into the guest via a virtualized APIC or in case
that the guest is scheduled out provides a host side notification
interrupt which informs the host that an interrupt has been marked
pending in the bitmap.
This can be utilized on bare metal for scenarios where multiple
devices, e.g. NVME storage, raise interrupts with a high frequency. In
the default mode these interrupts are handles independently and
therefore require a full roundtrip of interrupt entry/exit.
Utilizing posted interrupts this roundtrip overhead can be avoided by
coalescing these interrupt entries to a single entry for the posted
interrupt notification. The notification interrupt then demultiplexes
the pending bits in a memory based bitmap and invokes the
corresponding device specific handlers.
Depending on the usage scenario and device utilization throughput
improvements between 10% and 130% have been measured.
As this is only relevant for high end servers with multiple device
queues per CPU attached and counterproductive for situations where
interrupts are arriving at distinct times, the functionality is opt-in
via a kernel command line parameter"
* tag 'x86-irq-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/irq: Use existing helper for pending vector check
iommu/vt-d: Enable posted mode for device MSIs
iommu/vt-d: Make posted MSI an opt-in command line option
x86/irq: Extend checks for pending vectors to posted interrupts
x86/irq: Factor out common code for checking pending interrupts
x86/irq: Install posted MSI notification handler
x86/irq: Factor out handler invocation from common_interrupt()
x86/irq: Set up per host CPU posted interrupt descriptors
x86/irq: Reserve a per CPU IDT vector for posted MSIs
x86/irq: Add a Kconfig option for posted MSI
x86/irq: Remove bitfields in posted interrupt descriptor
x86/irq: Unionize PID.PIR for 64bit access w/o casting
KVM: VMX: Move posted interrupt descriptor out of VMX code
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/apic.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/idtentry.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_remapping.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/posted_intr.h | 118 |
6 files changed, 150 insertions, 1 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 839c58f95114..9327eb00e96d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -14,6 +14,7 @@ #include <asm/msr.h> #include <asm/hardirq.h> #include <asm/io.h> +#include <asm/posted_intr.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -500,6 +501,11 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector) return !!(irr & (1U << (vector % 32))); } +static inline bool is_vector_pending(unsigned int vector) +{ + return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector); +} + /* * Warm reset vector position: */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index fbc7722b87d1..c67fa6ad098a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -44,10 +44,16 @@ typedef struct { unsigned int irq_hv_reenlightenment_count; unsigned int hyperv_stimer0_count; #endif +#ifdef CONFIG_X86_POSTED_MSI + unsigned int posted_msi_notification_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +#ifdef CONFIG_X86_POSTED_MSI +DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); +#endif #define __ARCH_IRQ_STAT #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 749c7411d2f1..d4f24499b256 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -751,6 +751,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested # define fred_sysvec_kvm_posted_intr_nested_ipi NULL #endif +# ifdef CONFIG_X86_POSTED_MSI +DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification); +#else +# define fred_sysvec_posted_msi_notification NULL +# endif + #if IS_ENABLED(CONFIG_HYPERV) DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 7a2ed154a5e1..5036f13ab69f 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } +extern bool enable_posted_msi; + +static inline bool posted_msi_supported(void) +{ + return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP); +} + #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..13aea8fc3d45 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -97,10 +97,16 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb + #define NR_VECTORS 256 #ifdef CONFIG_X86_LOCAL_APIC -#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h new file mode 100644 index 000000000000..de788b400fba --- /dev/null +++ b/arch/x86/include/asm/posted_intr.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_POSTED_INTR_H +#define _X86_POSTED_INTR_H +#include <asm/irq_vectors.h> + +#define POSTED_INTR_ON 0 +#define POSTED_INTR_SN 1 + +#define PID_TABLE_ENTRY_VALID 1 + +/* Posted-Interrupt Descriptor */ +struct pi_desc { + union { + u32 pir[8]; /* Posted interrupt requested */ + u64 pir64[4]; + }; + union { + struct { + u16 notifications; /* Suppress and outstanding bits */ + u8 nv; + u8 rsvd_2; + u32 ndst; + }; + u64 control; + }; + u32 rsvd[6]; +} __aligned(64); + +static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + +static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) +{ + return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); +} + +static inline void pi_set_sn(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline void pi_set_on(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_on(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_on(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_sn(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +/* Non-atomic helpers */ +static inline void __pi_set_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications |= BIT(POSTED_INTR_SN); +} + +static inline void __pi_clear_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications &= ~BIT(POSTED_INTR_SN); +} + +#ifdef CONFIG_X86_POSTED_MSI +/* + * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's + * own interrupts. Here we do not distinguish them since those vector bits in + * PIR will always be zero. + */ +static inline bool pi_pending_this_cpu(unsigned int vector) +{ + struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); + + if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR)) + return false; + + return test_bit(vector, (unsigned long *)pid->pir); +} + +extern void intel_posted_msi_init(void); +#else +static inline bool pi_pending_this_cpu(unsigned int vector) { return false; } + +static inline void intel_posted_msi_init(void) {}; +#endif /* X86_POSTED_MSI */ + +#endif /* _X86_POSTED_INTR_H */ |