From 0d0ae656b71155ccc0be9388beef77a1f7e7558e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:41 +0100 Subject: KVM: arm64: timers: Use a per-vcpu, per-timer accumulator for fractional ns Instead of accumulating the fractional ns value generated every time we compute a ns delta in a global variable, use a per-vcpu, per-timer variable. This keeps the fractional ns local to the timer instead of contributing to any odd, unrelated timer. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-2-maz@kernel.org --- include/kvm/arm_arch_timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/kvm') diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c52a6e6839da..70d47c4adc6a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -44,6 +44,7 @@ struct arch_timer_context { /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; + u64 ns_frac; /* Offset for this counter/timer */ struct arch_timer_offset offset; -- cgit v1.2.3 From 2b4825a8694018901e641ccc2eafd0fff58d1415 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:44 +0100 Subject: KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer With ECV and CNTPOFF_EL2, it is very easy to offer an offset for the physical timer. So let's do just that. Nothing can set the offset yet, so this should have no effect whatsoever (famous last words...). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-5-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 18 +++++++++++++++++- arch/arm64/kvm/hypercalls.c | 2 +- include/clocksource/arm_arch_timer.h | 1 + include/kvm/arm_arch_timer.h | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 9515c645f03d..3118ea0a1b41 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -52,6 +52,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + u32 timer_get_ctl(struct arch_timer_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -84,7 +89,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt->offset.vm_offset) + if (ctxt && ctxt->offset.vm_offset) return *ctxt->offset.vm_offset; return 0; @@ -432,6 +437,12 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); } +static void set_cntpoff(u64 cntpoff) +{ + if (has_cntpoff()) + write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); +} + static void timer_save_state(struct arch_timer_context *ctx) { struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); @@ -480,6 +491,7 @@ static void timer_save_state(struct arch_timer_context *ctx) write_sysreg_el0(0, SYS_CNTP_CTL); isb(); + set_cntpoff(0); break; case NR_KVM_TIMERS: BUG(); @@ -550,6 +562,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + set_cntpoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); @@ -767,6 +780,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->vcpu = vcpu; vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; ptimer->vcpu = vcpu; + ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; /* Synchronize cntvoff across all vtimers of a VM. */ timer_set_offset(vtimer, kvm_phys_timer_read()); @@ -1297,6 +1311,8 @@ void kvm_timer_init_vhe(void) val = read_sysreg(cnthctl_el2); val |= (CNTHCTL_EL1PCEN << cnthctl_shift); val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) + val |= CNTHCTL_ECV; write_sysreg(val, cnthctl_el2); } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..39a4707e081d 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset; break; case KVM_PTP_PHYS_COUNTER: - cycles = systime_snapshot.cycles; + cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset; break; default: return; diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 057c8964aefb..cbbc9a6dc571 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -21,6 +21,7 @@ #define CNTHCTL_EVNTEN (1 << 2) #define CNTHCTL_EVNTDIR (1 << 3) #define CNTHCTL_EVNTI (0xF << 4) +#define CNTHCTL_ECV (1 << 12) enum arch_timer_reg { ARCH_TIMER_REG_CTRL, diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 70d47c4adc6a..2dd0fd2406fb 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -34,6 +34,8 @@ struct arch_timer_offset { struct arch_timer_vm_data { /* Offset applied to the virtual timer/counter */ u64 voffset; + /* Offset applied to the physical timer/counter */ + u64 poffset; }; struct arch_timer_context { -- cgit v1.2.3 From 5591805d2c21b70838b723b71b8ff613de51cfff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:49 +0100 Subject: KVM: arm64: timers: Rationalise per-vcpu timer init The way we initialise our timer contexts may be satisfactory for two timers, but will be getting pretty annoying with four. Cleanup the whole thing by removing the code duplication and getting rid of unused IRQ configuration elements. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-10-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 73 +++++++++++++++++++++++--------------------- include/kvm/arm_arch_timer.h | 1 - 2 files changed, 39 insertions(+), 35 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 771504c79711..e46f04ed8f86 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -30,14 +30,9 @@ static u32 host_ptimer_irq_flags; static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, +static const u8 default_ppi[] = { + [TIMER_PTIMER] = 30, + [TIMER_VTIMER] = 27, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -820,12 +815,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer_set_ctl(vcpu_vtimer(vcpu), 0); - timer_set_ctl(vcpu_ptimer(vcpu), 0); + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + if (timer->enabled) { - kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); - kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm_timer_update_irq(vcpu, false, + vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); @@ -840,39 +837,47 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) return 0; } +static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) +{ + struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); + struct kvm *kvm = vcpu->kvm; + + ctxt->vcpu = vcpu; + + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + + hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + ctxt->hrtimer.function = kvm_hrtimer_expire; + ctxt->irq.irq = default_ppi[timerid]; + + switch (timerid) { + case TIMER_PTIMER: + ctxt->host_timer_irq = host_ptimer_irq; + break; + case TIMER_VTIMER: + ctxt->host_timer_irq = host_vtimer_irq; + break; + } +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - vtimer->vcpu = vcpu; - vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; - ptimer->vcpu = vcpu; - ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); + timer_set_offset(vcpu_ptimer(vcpu), 0); } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; - - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - vtimer->hrtimer.function = kvm_hrtimer_expire; - ptimer->hrtimer.function = kvm_hrtimer_expire; - - vtimer->irq.irq = default_vtimer_irq.irq; - ptimer->irq.irq = default_ptimer_irq.irq; - - vtimer->host_timer_irq = host_vtimer_irq; - ptimer->host_timer_irq = host_ptimer_irq; - - vtimer->host_timer_irq_flags = host_vtimer_irq_flags; - ptimer->host_timer_irq_flags = host_ptimer_irq_flags; } void kvm_timer_cpu_up(void) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 2dd0fd2406fb..c746ef64220b 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -59,7 +59,6 @@ struct arch_timer_context { /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; - u32 host_timer_irq_flags; }; struct timer_map { -- cgit v1.2.3 From 33c549460ef9119eb115484e81f54521122341db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:50 +0100 Subject: KVM: arm64: timers: Abstract per-timer IRQ access As we are about to move the location of the per-timer IRQ into the VM structure, abstract the location of the IRQ behind an accessor. This will make the repainting sligntly less painful. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-11-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 38 +++++++++++++++++++------------------- include/kvm/arm_arch_timer.h | 2 ++ 2 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e46f04ed8f86..d08d8c2fc30d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -392,12 +392,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, int ret; timer_ctx->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer_ctx->irq.irq, + timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); WARN_ON(ret); @@ -607,7 +607,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) - phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); phys_active |= ctx->irq.level; @@ -825,9 +825,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); if (map.direct_ptimer) - kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); } } @@ -851,7 +851,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - ctxt->irq.irq = default_ppi[timerid]; + timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -1295,19 +1295,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) int vtimer_irq, ptimer_irq, ret; unsigned long i; - vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); if (ret) return false; - ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); if (ret) return false; kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || - vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || + timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) return false; } @@ -1322,9 +1322,9 @@ bool kvm_arch_timer_get_input_level(int vintid) if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == vcpu_vtimer(vcpu)->irq.irq) + if (vintid == timer_irq(vcpu_vtimer(vcpu))) timer = vcpu_vtimer(vcpu); - else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + else if (vintid == timer_irq(vcpu_ptimer(vcpu))) timer = vcpu_ptimer(vcpu); else BUG(); @@ -1358,7 +1358,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, - map.direct_vtimer->irq.irq, + timer_irq(map.direct_vtimer), &arch_timer_irq_ops); if (ret) return ret; @@ -1366,7 +1366,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (map.direct_ptimer) { ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, - map.direct_ptimer->irq.irq, + timer_irq(map.direct_ptimer), &arch_timer_irq_ops); } @@ -1391,8 +1391,8 @@ static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; - vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; + timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; } } @@ -1417,10 +1417,10 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); break; default: return -ENXIO; @@ -1446,7 +1446,7 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } - irq = timer->irq.irq; + irq = timer_irq(timer); return put_user(irq, uaddr); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c746ef64220b..27cada09f588 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -109,6 +109,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) +#define timer_irq(ctx) ((ctx)->irq.irq) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); -- cgit v1.2.3 From 8a5eb2d210807e7dbe9ece7075533014cf4b9c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:51 +0100 Subject: KVM: arm64: timers: Move the timer IRQs into arch_timer_vm_data Having the timer IRQs duplicated into each vcpu isn't great, and becomes absolutely awful with NV. So let's move these into the per-VM arch_timer_vm_data structure. This simplifies a lot of code, but requires us to introduce a mutex so that we can reason about userspace trying to change an interrupt number while another vcpu is running, something that wasn't really well handled so far. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-12-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/kvm/arch_timer.c | 108 ++++++++++++++++++++++---------------- arch/arm64/kvm/arm.c | 2 + include/kvm/arm_arch_timer.h | 18 +++++-- 4 files changed, 82 insertions(+), 48 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 116233a390e9..1280154c9ef3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -223,6 +223,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 /* VM counter offset */ #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 unsigned long flags; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d08d8c2fc30d..1d811735e05f 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer->bg_timer.function = kvm_bg_timer_expire; } +void kvm_timer_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.timer_data.lock); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm->arch.timer_data.ppi[i] = default_ppi[i]; +} + void kvm_timer_cpu_up(void) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); @@ -1292,44 +1298,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - int vtimer_irq, ptimer_irq, ret; - unsigned long i; + u32 ppis = 0; + bool valid; - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); - if (ret) - return false; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); - if (ret) - return false; + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + int irq; - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) - return false; + ctx = vcpu_get_timer(vcpu, i); + irq = timer_irq(ctx); + if (kvm_vgic_set_owner(vcpu, irq, ctx)) + break; + + /* + * We know by construction that we only have PPIs, so + * all values are less than 32. + */ + ppis |= BIT(irq); } - return true; + valid = hweight32(ppis) == NR_KVM_TIMERS; + + if (valid) + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); + + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + + return valid; } bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); - struct arch_timer_context *timer; if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == timer_irq(vcpu_vtimer(vcpu))) - timer = vcpu_vtimer(vcpu); - else if (vintid == timer_irq(vcpu_ptimer(vcpu))) - timer = vcpu_ptimer(vcpu); - else - BUG(); + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + + ctx = vcpu_get_timer(vcpu, i); + if (timer_irq(ctx) == vintid) + return kvm_timer_should_fire(ctx); + } - return kvm_timer_should_fire(timer); + /* A timer IRQ has fired, but no matching timer was found? */ + WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); + + return false; } int kvm_timer_enable(struct kvm_vcpu *vcpu) @@ -1385,23 +1403,10 @@ void kvm_timer_init_vhe(void) sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } -static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; - timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; - } -} - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int __user *uaddr = (int __user *)(long)attr->addr; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - int irq; + int irq, idx, ret = 0; if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -1412,21 +1417,36 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq))) return -EINVAL; - if (vcpu->arch.timer_cpu.enabled) - return -EBUSY; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); + + if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, + &vcpu->kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); + idx = TIMER_VTIMER; break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); + idx = TIMER_PTIMER; break; default: - return -ENXIO; + ret = -ENXIO; + goto out; } - return 0; + /* + * We cannot validate the IRQ unicity before we run, so take it at + * face value. The verdict will be given on first vcpu run, for each + * vcpu. Yes this is late. Blame it on the stupid API. + */ + vcpu->kvm->arch.timer_data.ppi[idx] = irq; + +out: + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + return ret; } int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1c8a4bbae684..4c5e9dfbf83a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -148,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); + kvm_timer_init_vm(kvm); + /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 27cada09f588..f093ea9f540d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -36,14 +36,16 @@ struct arch_timer_vm_data { u64 voffset; /* Offset applied to the physical timer/counter */ u64 poffset; + + struct mutex lock; + + /* The PPI for each timer, global to the VM */ + u8 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Timer IRQ */ - struct kvm_irq_level irq; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -57,6 +59,11 @@ struct arch_timer_context { */ bool loaded; + /* Output level of the timer IRQ */ + struct { + bool level; + } irq; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -86,6 +93,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +void kvm_timer_init_vm(struct kvm *kvm); + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); @@ -109,7 +118,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_irq(ctx) ((ctx)->irq.irq) +#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, -- cgit v1.2.3 From 1e0eec09d43a55125ff80e40b2d6e2f369a338b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:56 +0100 Subject: KVM: arm64: nv: timers: Add a per-timer, per-vcpu offset Being able to set a global offset isn't enough. With NV, we also need to a per-vcpu, per-timer offset (for example, CNTVCT_EL0 being offset by CNTVOFF_EL2). Use a similar method as the VM-wide offset to have a timer point to the shadow register that contains the offset value. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-17-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 13 ++++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ include/kvm/arm_arch_timer.h | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3a7902269c1..b87bf182af33 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -89,10 +89,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt && ctxt->offset.vm_offset) - return *ctxt->offset.vm_offset; + u64 offset = 0; - return 0; + if (!ctxt) + return 0; + + if (ctxt->offset.vm_offset) + offset += *ctxt->offset.vm_offset; + if (ctxt->offset.vcpu_offset) + offset += *ctxt->offset.vcpu_offset; + + return offset; } static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 9954368f639d..d07cbc313889 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -353,6 +353,8 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) if (ctxt->offset.vm_offset) val -= *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + val -= *kern_hyp_va(ctxt->offset.vcpu_offset); vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); __kvm_skip_instr(vcpu); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f093ea9f540d..209da0c2ac9f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -29,6 +29,11 @@ struct arch_timer_offset { * structure. If NULL, assume a zero offset. */ u64 *vm_offset; + /* + * If set, pointer to one of the offsets in the vcpu's sysreg + * array. If NULL, assume a zero offset. + */ + u64 *vcpu_offset; }; struct arch_timer_vm_data { -- cgit v1.2.3 From 81dc9504a7006b484cfcf074796094ee526b0c45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:57 +0100 Subject: KVM: arm64: nv: timers: Support hyp timer emulation Emulating EL2 also means emulating the EL2 timers. To do so, we expand our timer framework to deal with at most 4 timers. At any given time, two timers are using the HW timers, and the two others are purely emulated. The role of deciding which is which at any given time is left to a mapping function which is called every time we need to make such a decision. Reviewed-by: Colton Lewis Co-developed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-18-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 + arch/arm64/include/uapi/asm/kvm.h | 2 + arch/arm64/kvm/arch_timer.c | 180 ++++++++++++++++++++++++++++++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 +++ arch/arm64/kvm/trace_arm.h | 6 +- arch/arm64/kvm/vgic/vgic.c | 15 +++ include/kvm/arm_arch_timer.h | 9 +- include/kvm/arm_vgic.h | 1 + 8 files changed, 220 insertions(+), 12 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1280154c9ef3..633a7c0750bb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,6 +369,10 @@ enum vcpu_sysreg { TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ + CNTHP_CTL_EL2, + CNTHP_CVAL_EL2, + CNTHV_CTL_EL2, + CNTHV_CVAL_EL2, NR_SYS_REGS /* Nothing after this line! */ }; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 12fb0d8a760a..0921f366c49f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -420,6 +420,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index b87bf182af33..c5c8cc3c25ae 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,8 @@ static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); static const u8 default_ppi[] = { [TIMER_PTIMER] = 30, [TIMER_VTIMER] = 27, + [TIMER_HPTIMER] = 26, + [TIMER_HVTIMER] = 28, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -46,6 +49,11 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool kvm_arch_timer_get_input_level(int vintid); + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; static bool has_cntpoff(void) { @@ -54,6 +62,9 @@ static bool has_cntpoff(void) static int nr_timers(struct kvm_vcpu *vcpu) { + if (!vcpu_has_nv(vcpu)) + return NR_KVM_EL0_TIMERS; + return NR_KVM_TIMERS; } @@ -66,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); default: WARN_ON(1); return 0; @@ -81,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); default: WARN_ON(1); return 0; @@ -113,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + break; default: WARN_ON(1); } @@ -129,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + break; default: WARN_ON(1); } @@ -151,13 +182,27 @@ u64 kvm_phys_timer_read(void) static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { - if (has_vhe()) { + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + map->direct_vtimer = vcpu_hvtimer(vcpu); + map->direct_ptimer = vcpu_hptimer(vcpu); + map->emul_vtimer = vcpu_vtimer(vcpu); + map->emul_ptimer = vcpu_ptimer(vcpu); + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = vcpu_hvtimer(vcpu); + map->emul_ptimer = vcpu_hptimer(vcpu); + } + } else if (has_vhe()) { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = NULL; map->emul_ptimer = NULL; } else { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = NULL; + map->emul_vtimer = NULL; map->emul_ptimer = vcpu_ptimer(vcpu); } @@ -252,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) { - struct arch_timer_context *ctx = vcpu_vtimer(vcpu); u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + struct arch_timer_context *ctx; + + ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) + : vcpu_vtimer(vcpu); return kvm_counter_compute_delta(ctx, val); } @@ -350,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) switch (index) { case TIMER_VTIMER: + case TIMER_HVTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); break; case NR_KVM_TIMERS: @@ -468,6 +518,7 @@ static void timer_save_state(struct arch_timer_context *ctx) u64 cval; case TIMER_VTIMER: + case TIMER_HVTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -493,6 +544,7 @@ static void timer_save_state(struct arch_timer_context *ctx) set_cntvoff(0); break; case TIMER_PTIMER: + case TIMER_HPTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); @@ -536,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_vtimer) && !kvm_timer_irq_can_fire(map.emul_ptimer) && !vcpu_has_wfit_active(vcpu)) return; @@ -572,12 +625,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) u64 cval, offset; case TIMER_VTIMER: + case TIMER_HVTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); @@ -663,6 +718,57 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) (_clr) |= (_bit); \ } while (0) +static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, + struct timer_map *map) +{ + int hw, ret; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + /* + * We only ever unmap the vtimer irq on a VHE system that runs nested + * virtualization, in which case we have both a valid emul_vtimer, + * emul_ptimer, direct_vtimer, and direct_ptimer. + * + * Since this is called from kvm_timer_vcpu_load(), a change between + * vEL2 and vEL1/0 will have just happened, and the timer_map will + * represent this, and therefore we switch the emul/direct mappings + * below. + */ + hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); + if (hw < 0) { + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); + + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_vtimer->host_timer_irq, + timer_irq(map->direct_vtimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_ptimer->host_timer_irq, + timer_irq(map->direct_ptimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + + /* + * The virtual offset behaviour is "interresting", as it + * always applies when HCR_EL2.E2H==0, but only when + * accessed from EL1 when HCR_EL2.E2H==1. So make sure we + * track E2H when putting the HV timer in "direct" mode. + */ + if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { + struct arch_timer_offset *offs = &map->direct_vtimer->offset; + + if (vcpu_el2_e2h_is_set(vcpu)) + offs->vcpu_offset = NULL; + else + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + } + } +} + static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) { bool tpt, tpc; @@ -695,6 +801,22 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) tpt = tpc = true; + /* + * Apply the enable bits that the guest hypervisor has requested for + * its own guest. We can only add traps that wouldn't have been set + * above. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + /* Use the VHE format for mental sanity */ + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; + + tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); + tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); + } + /* * Now that we have collected our requirements, compute the * trap and enable bits. @@ -720,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { + if (vcpu_has_nv(vcpu)) + kvm_timer_vcpu_load_nested_switch(vcpu, &map); + kvm_timer_vcpu_load_gic(map.direct_vtimer); if (map.direct_ptimer) kvm_timer_vcpu_load_gic(map.direct_ptimer); @@ -732,6 +857,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); + if (map.emul_vtimer) + timer_emulate(map.emul_vtimer); if (map.emul_ptimer) timer_emulate(map.emul_ptimer); @@ -778,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -830,6 +959,17 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) for (int i = 0; i < nr_timers(vcpu); i++) timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + /* + * A vcpu running at EL2 is in charge of the offset applied to + * the virtual timer, so use the physical VM offset, and point + * the vcpu offset to CNTVOFF_EL2. + */ + if (vcpu_has_nv(vcpu)) { + struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; + + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; + } if (timer->enabled) { for (int i = 0; i < nr_timers(vcpu); i++) @@ -843,6 +983,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) } } + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -866,9 +1008,11 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) switch (timerid) { case TIMER_PTIMER: + case TIMER_HPTIMER: ctxt->host_timer_irq = host_ptimer_irq; break; case TIMER_VTIMER: + case TIMER_HVTIMER: ctxt->host_timer_irq = host_vtimer_irq; break; } @@ -1020,6 +1164,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, val = kvm_phys_timer_read() - timer_get_offset(timer); break; + case TIMER_REG_VOFF: + val = *timer->offset.vcpu_offset; + break; + default: BUG(); } @@ -1038,7 +1186,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) + if (timer == map.emul_vtimer || timer == map.emul_ptimer) return kvm_arm_timer_read(vcpu, timer, treg); preempt_disable(); @@ -1070,6 +1218,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, timer_set_cval(timer, val); break; + case TIMER_REG_VOFF: + *timer->offset.vcpu_offset = val; + break; + default: BUG(); } @@ -1085,7 +1237,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) { + if (timer == map.emul_vtimer || timer == map.emul_ptimer) { soft_timer_cancel(&timer->hrtimer); kvm_arm_timer_write(vcpu, timer, treg, val); timer_emulate(timer); @@ -1165,10 +1317,6 @@ static const struct irq_domain_ops timer_domain_ops = { .free = timer_irq_domain_free, }; -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -1341,7 +1489,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) return valid; } -bool kvm_arch_timer_get_input_level(int vintid) +static bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); @@ -1444,6 +1592,12 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: idx = TIMER_PTIMER; break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + idx = TIMER_HVTIMER; + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + idx = TIMER_HPTIMER; + break; default: ret = -ENXIO; goto out; @@ -1474,6 +1628,12 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: timer = vcpu_ptimer(vcpu); break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + timer = vcpu_hvtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + timer = vcpu_hptimer(vcpu); + break; default: return -ENXIO; } @@ -1487,6 +1647,8 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: return 0; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index d07cbc313889..c41166f1a1dd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -343,6 +343,21 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) switch (sysreg) { case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + ctxt = vcpu_hptimer(vcpu); + break; + } + + /* Check for guest hypervisor trapping */ + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & CNTHCTL_EL1PCTEN) << 10; + + if (!(val & (CNTHCTL_EL1PCTEN << 10))) + return false; + } + ctxt = vcpu_ptimer(vcpu); break; default: diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index f3e46a976125..6ce5c025218d 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map, __field( unsigned long, vcpu_id ) __field( int, direct_vtimer ) __field( int, direct_ptimer ) + __field( int, emul_vtimer ) __field( int, emul_ptimer ) ), @@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map, __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); __entry->direct_ptimer = (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_vtimer = + (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1; __entry->emul_ptimer = (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; ), - TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d", __entry->vcpu_id, __entry->direct_vtimer, __entry->direct_ptimer, + __entry->emul_vtimer, __entry->emul_ptimer) ); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..ae491ef97188 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -573,6 +573,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) return 0; } +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 209da0c2ac9f..52008f5cff06 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -13,6 +13,9 @@ enum kvm_arch_timers { TIMER_PTIMER, TIMER_VTIMER, + NR_KVM_EL0_TIMERS, + TIMER_HVTIMER = NR_KVM_EL0_TIMERS, + TIMER_HPTIMER, NR_KVM_TIMERS }; @@ -21,6 +24,7 @@ enum kvm_arch_timer_regs { TIMER_REG_CVAL, TIMER_REG_TVAL, TIMER_REG_CTL, + TIMER_REG_VOFF, }; struct arch_timer_offset { @@ -76,6 +80,7 @@ struct arch_timer_context { struct timer_map { struct arch_timer_context *direct_vtimer; struct arch_timer_context *direct_ptimer; + struct arch_timer_context *emul_vtimer; struct arch_timer_context *emul_ptimer; }; @@ -114,12 +119,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); -bool kvm_arch_timer_get_input_level(int vintid); - #define vcpu_timer(v) (&(v)->arch.timer_cpu) #define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) +#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) +#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d3ad51fde9db..402b545959af 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -380,6 +380,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From aac94968126beb9846c12a940f1302ece7849b4f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:41 +0000 Subject: KVM: arm64: Rename SMC/HVC call handler to reflect reality KVM handles SMCCC calls from virtual EL2 that use the SMC instruction since commit bd36b1a9eb5a ("KVM: arm64: nv: Handle SMCs taken from virtual EL2"). Thus, the function name of the handler no longer reflects reality. Normalize the name on SMCCC, since that's the only hypercall interface KVM supports in the first place. No fuctional change intended. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-5-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 4 ++-- arch/arm64/kvm/hypercalls.c | 2 +- include/kvm/arm_hypercalls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a798c0b4d717..5e4f9737cbd5 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -52,7 +52,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); return 1; @@ -89,7 +89,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) vcpu_set_reg(vcpu, 0, ~0UL); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index a09a526a7d7c..5ead6c6afff0 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,7 +121,7 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) } } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 1188f116cf4e..8f4e33bc43e8 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -6,7 +6,7 @@ #include -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu); static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From fb88707dd39bd1d5ec4a058776de9ee99bcc7b72 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:44 +0000 Subject: KVM: arm64: Use a maple tree to represent the SMCCC filter Maple tree is an efficient B-tree implementation that is intended for storing non-overlapping intervals. Such a data structure is a good fit for the SMCCC filter as it is desirable to sparsely allocate the 32 bit function ID space. To that end, add a maple tree to kvm_arch and correctly init/teardown along with the VM. Wire in a test against the hypercall filter for HVCs which does nothing until the controls are exposed to userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-8-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 5 +++- arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hypercalls.c | 57 +++++++++++++++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 1 + 4 files changed, 64 insertions(+), 1 deletion(-) (limited to 'include/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d091d1c9890b..2682b3fd0881 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -221,7 +222,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 - + /* SMCCC filter initialized for the VM */ +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 6 unsigned long flags; /* @@ -242,6 +244,7 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + struct maple_tree smccc_filter; /* * For an untrusted host VM, 'pkvm.handle' is used to lookup diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b6e26c0e65e5..1202ac03bee0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -192,6 +192,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); + + kvm_arm_teardown_hypercalls(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 0be974e2f1fc..ba7cd84c6668 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,8 +121,58 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) } } +#define SMCCC_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMCCC_ARCH_RANGE_END \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +static void init_smccc_filter(struct kvm *kvm) +{ + int r; + + mt_init(&kvm->arch.smccc_filter); + + /* + * Prevent userspace from handling any SMCCC calls in the architecture + * range, avoiding the risk of misrepresenting Spectre mitigation status + * to the guest. + */ + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMCCC_ARCH_RANGE_BEGIN, SMCCC_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); +} + +static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) +{ + unsigned long idx = func_id; + void *val; + + if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + return KVM_SMCCC_FILTER_HANDLE; + + /* + * But where's the error handling, you say? + * + * mt_find() returns NULL if no entry was found, which just so happens + * to match KVM_SMCCC_FILTER_HANDLE. + */ + val = mt_find(&kvm->arch.smccc_filter, &idx, idx); + return xa_to_value(val); +} + static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) { + /* + * Intervening actions in the SMCCC filter take precedence over the + * pseudo-firmware register bitmaps. + */ + u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id); + if (action != KVM_SMCCC_FILTER_HANDLE) + return action; + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || kvm_smccc_default_allowed(func_id)) return KVM_SMCCC_FILTER_HANDLE; @@ -263,6 +313,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + + init_smccc_filter(kvm); +} + +void kvm_arm_teardown_hypercalls(struct kvm *kvm) +{ + mtree_destroy(&kvm->arch.smccc_filter); } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 8f4e33bc43e8..fe6c31575b05 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -43,6 +43,7 @@ static inline void smccc_set_retval(struct kvm_vcpu *vcpu, struct kvm_one_reg; void kvm_arm_init_hypercalls(struct kvm *kvm); +void kvm_arm_teardown_hypercalls(struct kvm *kvm); int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -- cgit v1.2.3 From 821d935c87bc95253f82deec3cbb457ccf3de003 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:46 +0000 Subject: KVM: arm64: Introduce support for userspace SMCCC filtering As the SMCCC (and related specifications) march towards an 'everything and the kitchen sink' interface for interacting with a system it becomes less likely that KVM will support every related feature. We could do better by letting userspace have a crack at it instead. Allow userspace to define an 'SMCCC filter' that applies to both HVCs and SMCs initiated by the guest. Supporting both conduits with this interface is important for a couple of reasons. Guest SMC usage is table stakes for a nested guest, as HVCs are always taken to the virtual EL2. Additionally, guests may want to interact with a service on the secure side which can now be proxied by userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-10-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 4 ++ Documentation/virt/kvm/devices/vm.rst | 79 +++++++++++++++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 11 +++++ arch/arm64/kvm/arm.c | 4 ++ arch/arm64/kvm/hypercalls.c | 60 ++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 3 ++ 6 files changed, 161 insertions(+) (limited to 'include/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9497792c4ee5..c8ab2f730945 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6231,6 +6231,10 @@ requires a guest to interact with host userpace. For arm64: ---------- +SMCCC exits can be enabled depending on the configuration of the SMCCC +filter. See the Documentation/virt/kvm/devices/vm.rst +``KVM_ARM_SMCCC_FILTER`` for more details. + ``nr`` contains the function ID of the guest's SMCCC call. Userspace is expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call parameters from the vCPU's GPRs. diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 147efec626e5..9d726e60ec47 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -321,3 +321,82 @@ Allows userspace to query the status of migration mode. if it is enabled :Returns: -EFAULT if the given address is not accessible from kernel space; 0 in case of success. + +6. GROUP: KVM_ARM_VM_SMCCC_CTRL +=============================== + +:Architectures: arm64 + +6.1. ATTRIBUTE: KVM_ARM_VM_SMCCC_FILTER (w/o) +--------------------------------------------- + +:Parameters: Pointer to a ``struct kvm_smccc_filter`` + +:Returns: + + ====== =========================================== + EEXIST Range intersects with a previously inserted + or reserved range + EBUSY A vCPU in the VM has already run + EINVAL Invalid filter configuration + ENOMEM Failed to allocate memory for the in-kernel + representation of the SMCCC filter + ====== =========================================== + +Requests the installation of an SMCCC call filter described as follows:: + + enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + }; + + struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; + }; + +The filter is defined as a set of non-overlapping ranges. Each +range defines an action to be applied to SMCCC calls within the range. +Userspace can insert multiple ranges into the filter by using +successive calls to this attribute. + +The default configuration of KVM is such that all implemented SMCCC +calls are allowed. Thus, the SMCCC filter can be defined sparsely +by userspace, only describing ranges that modify the default behavior. + +The range expressed by ``struct kvm_smccc_filter`` is +[``base``, ``base + nr_functions``). The range is not allowed to wrap, +i.e. userspace cannot rely on ``base + nr_functions`` overflowing. + +The SMCCC filter applies to both SMC and HVC calls initiated by the +guest. The SMCCC filter gates the in-kernel emulation of SMCCC calls +and as such takes effect before other interfaces that interact with +SMCCC calls (e.g. hypercall bitmap registers). + +Actions: + + - ``KVM_SMCCC_FILTER_HANDLE``: Allows the guest SMCCC call to be + handled in-kernel. It is strongly recommended that userspace *not* + explicitly describe the allowed SMCCC call ranges. + + - ``KVM_SMCCC_FILTER_DENY``: Rejects the guest SMCCC call in-kernel + and returns to the guest. + + - ``KVM_SMCCC_FILTER_FWD_TO_USER``: The guest SMCCC call is forwarded + to userspace with an exit reason of ``KVM_EXIT_HYPERCALL``. + +The ``pad`` field is reserved for future use and must be zero. KVM may +return ``-EINVAL`` if the field is nonzero. + +KVM reserves the 'Arm Architecture Calls' range of function IDs and +will reject attempts to define a filter for any portion of these ranges: + + =========== =============== + Start End (inclusive) + =========== =============== + 0x8000_0000 0x8000_FFFF + 0xC000_0000 0xC000_FFFF + =========== =============== diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f86446c5a7e3..3dcfa4bfdf83 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -372,6 +372,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -479,6 +483,13 @@ enum kvm_smccc_filter_action { #endif }; +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + /* arm64-specific KVM_EXIT_HYPERCALL flags */ #define KVM_HYPERCALL_EXIT_SMC (1U << 0) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1202ac03bee0..efee032c9560 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1444,6 +1444,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_has_attr(kvm, attr); default: return -ENXIO; } @@ -1452,6 +1454,8 @@ static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_set_attr(kvm, attr); default: return -ENXIO; } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 2db53709bec1..9a35d6d18193 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -145,6 +145,44 @@ static void init_smccc_filter(struct kvm *kvm) WARN_ON_ONCE(r); } +static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + struct kvm_smccc_filter filter; + u32 start, end; + int r; + + if (copy_from_user(&filter, uaddr, sizeof(filter))) + return -EFAULT; + + if (memcmp(filter.pad, zero_page, sizeof(filter.pad))) + return -EINVAL; + + start = filter.base; + end = start + filter.nr_functions - 1; + + if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm_vm_has_ran_once(kvm)) { + r = -EBUSY; + goto out_unlock; + } + + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, + xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + + set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); + +out_unlock: + mutex_unlock(&kvm->lock); + return r; +} + static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) { unsigned long idx = func_id; @@ -569,3 +607,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; } + +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return 0; + default: + return -ENXIO; + } +} + +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user *)attr->addr; + + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return kvm_smccc_set_filter(kvm, uaddr); + default: + return -ENXIO; + } +} diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index fe6c31575b05..2df152207ccd 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -49,4 +49,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr); +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr); + #endif -- cgit v1.2.3