summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst71
-rw-r--r--Documentation/virt/kvm/devices/vm.rst79
-rw-r--r--arch/arm64/include/asm/kvm_host.h25
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h36
-rw-r--r--arch/arm64/kernel/cpufeature.c11
-rw-r--r--arch/arm64/kvm/arch_timer.c550
-rw-r--r--arch/arm64/kvm/arm.c147
-rw-r--r--arch/arm64/kvm/guest.c31
-rw-r--r--arch/arm64/kvm/handle_exit.c36
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h53
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c18
-rw-r--r--arch/arm64/kvm/hypercalls.c189
-rw-r--r--arch/arm64/kvm/pmu-emul.c25
-rw-r--r--arch/arm64/kvm/psci.c37
-rw-r--r--arch/arm64/kvm/reset.c15
-rw-r--r--arch/arm64/kvm/sys_regs.c10
-rw-r--r--arch/arm64/kvm/trace_arm.h6
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c36
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c85
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c4
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c11
-rw-r--r--arch/arm64/kvm/vgic/vgic.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic.h3
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg4
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--include/clocksource/arm_arch_timer.h1
-rw-r--r--include/kvm/arm_arch_timer.h34
-rw-r--r--include/kvm/arm_hypercalls.h6
-rw-r--r--include/kvm/arm_vgic.h1
-rw-r--r--include/uapi/linux/kvm.h12
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c56
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c15
-rw-r--r--tools/testing/selftests/kvm/aarch64/smccc_filter.c268
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h13
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c91
45 files changed, 1690 insertions, 395 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 62de0768d6aa..42403dfe9496 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6029,6 +6029,44 @@ delivery must be provided via the "reg_aen" struct.
The "pad" and "reserved" fields may be used for future extensions and should be
set to 0s by userspace.
+4.138 KVM_ARM_SET_COUNTER_OFFSET
+--------------------------------
+
+:Capability: KVM_CAP_COUNTER_OFFSET
+:Architectures: arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_counter_offset (in)
+:Returns: 0 on success, < 0 on error
+
+This capability indicates that userspace is able to apply a single VM-wide
+offset to both the virtual and physical counters as viewed by the guest
+using the KVM_ARM_SET_CNT_OFFSET ioctl and the following data structure:
+
+::
+
+ struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+ };
+
+The offset describes a number of counter cycles that are subtracted from
+both virtual and physical counter views (similar to the effects of the
+CNTVOFF_EL2 and CNTPOFF_EL2 system registers, but only global). The offset
+always applies to all vcpus (already created or created after this ioctl)
+for this VM.
+
+It is userspace's responsibility to compute the offset based, for example,
+on previous values of the guest counters.
+
+Any value other than 0 for the "reserved" field may result in an error
+(-EINVAL) being returned. This ioctl can also return -EBUSY if any vcpu
+ioctl is issued concurrently.
+
+Note that using this ioctl results in KVM ignoring subsequent userspace
+writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG
+interface. No error will be returned, but the resulting offset will not be
+applied.
+
5. The kvm_run structure
========================
@@ -6218,15 +6256,40 @@ to the byte array.
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+ __u64 flags;
} hypercall;
-Unused. This was once used for 'hypercall to userspace'. To implement
-such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+
+It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or
+``KVM_EXIT_MMIO`` (all except s390) to implement functionality that
+requires a guest to interact with host userpace.
.. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+For arm64:
+----------
+
+SMCCC exits can be enabled depending on the configuration of the SMCCC
+filter. See the Documentation/virt/kvm/devices/vm.rst
+``KVM_ARM_SMCCC_FILTER`` for more details.
+
+``nr`` contains the function ID of the guest's SMCCC call. Userspace is
+expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call
+parameters from the vCPU's GPRs.
+
+Definition of ``flags``:
+ - ``KVM_HYPERCALL_EXIT_SMC``: Indicates that the guest used the SMC
+ conduit to initiate the SMCCC call. If this bit is 0 then the guest
+ used the HVC conduit for the SMCCC call.
+
+ - ``KVM_HYPERCALL_EXIT_16BIT``: Indicates that the guest used a 16bit
+ instruction to initiate the SMCCC call. If this bit is 0 then the
+ guest used a 32bit instruction. An AArch64 guest always has this
+ bit set to 0.
+
+At the point of exit, PC points to the instruction immediately following
+the trapping instruction.
+
::
/* KVM_EXIT_TPR_ACCESS */
diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst
index 147efec626e5..9d726e60ec47 100644
--- a/Documentation/virt/kvm/devices/vm.rst
+++ b/Documentation/virt/kvm/devices/vm.rst
@@ -321,3 +321,82 @@ Allows userspace to query the status of migration mode.
if it is enabled
:Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success.
+
+6. GROUP: KVM_ARM_VM_SMCCC_CTRL
+===============================
+
+:Architectures: arm64
+
+6.1. ATTRIBUTE: KVM_ARM_VM_SMCCC_FILTER (w/o)
+---------------------------------------------
+
+:Parameters: Pointer to a ``struct kvm_smccc_filter``
+
+:Returns:
+
+ ====== ===========================================
+ EEXIST Range intersects with a previously inserted
+ or reserved range
+ EBUSY A vCPU in the VM has already run
+ EINVAL Invalid filter configuration
+ ENOMEM Failed to allocate memory for the in-kernel
+ representation of the SMCCC filter
+ ====== ===========================================
+
+Requests the installation of an SMCCC call filter described as follows::
+
+ enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+ };
+
+ struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+ };
+
+The filter is defined as a set of non-overlapping ranges. Each
+range defines an action to be applied to SMCCC calls within the range.
+Userspace can insert multiple ranges into the filter by using
+successive calls to this attribute.
+
+The default configuration of KVM is such that all implemented SMCCC
+calls are allowed. Thus, the SMCCC filter can be defined sparsely
+by userspace, only describing ranges that modify the default behavior.
+
+The range expressed by ``struct kvm_smccc_filter`` is
+[``base``, ``base + nr_functions``). The range is not allowed to wrap,
+i.e. userspace cannot rely on ``base + nr_functions`` overflowing.
+
+The SMCCC filter applies to both SMC and HVC calls initiated by the
+guest. The SMCCC filter gates the in-kernel emulation of SMCCC calls
+and as such takes effect before other interfaces that interact with
+SMCCC calls (e.g. hypercall bitmap registers).
+
+Actions:
+
+ - ``KVM_SMCCC_FILTER_HANDLE``: Allows the guest SMCCC call to be
+ handled in-kernel. It is strongly recommended that userspace *not*
+ explicitly describe the allowed SMCCC call ranges.
+
+ - ``KVM_SMCCC_FILTER_DENY``: Rejects the guest SMCCC call in-kernel
+ and returns to the guest.
+
+ - ``KVM_SMCCC_FILTER_FWD_TO_USER``: The guest SMCCC call is forwarded
+ to userspace with an exit reason of ``KVM_EXIT_HYPERCALL``.
+
+The ``pad`` field is reserved for future use and must be zero. KVM may
+return ``-EINVAL`` if the field is nonzero.
+
+KVM reserves the 'Arm Architecture Calls' range of function IDs and
+will reject attempts to define a filter for any portion of these ranges:
+
+ =========== ===============
+ Start End (inclusive)
+ =========== ===============
+ 0x8000_0000 0x8000_FFFF
+ 0xC000_0000 0xC000_FFFF
+ =========== ===============
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bcd774d74f34..ce7530968e39 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+#include <linux/maple_tree.h>
#include <linux/percpu.h>
#include <linux/psci.h>
#include <asm/arch_gicv3.h>
@@ -199,6 +200,9 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
/*
* If we encounter a data abort without valid instruction syndrome
* information, report this to user space. User space can (and
@@ -221,7 +225,12 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
-
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
+ /* SMCCC filter initialized for the VM */
+#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
unsigned long flags;
/*
@@ -242,6 +251,7 @@ struct kvm_arch {
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
@@ -365,6 +375,10 @@ enum vcpu_sysreg {
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
NR_SYS_REGS /* Nothing after this line! */
};
@@ -522,6 +536,7 @@ struct kvm_vcpu_arch {
/* vcpu power state */
struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -922,6 +937,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
+
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -1007,6 +1025,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -1061,6 +1081,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
(system_supports_32bit_el0() && \
!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+#define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 083cc47dca08..27e63c111f78 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -63,6 +63,7 @@
* specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
+#ifndef __KVM_VHE_HYPERVISOR__
alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
and \reg, \reg, #1 /* mask with va_mask */
ror \reg, \reg, #1 /* rotate to the first tag bit */
@@ -70,6 +71,7 @@ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
ror \reg, \reg, #63 /* rotate back */
alternative_cb_end
+#endif
.endm
/*
@@ -127,6 +129,7 @@ void kvm_apply_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
+#ifndef __KVM_VHE_HYPERVISOR__
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
"add %0, %0, #0\n"
@@ -135,6 +138,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
ARM64_ALWAYS_SYSTEM,
kvm_update_va_mask)
: "+r" (v));
+#endif
return v;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 9e3ecba3c4e6..a43f21559c3e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -388,6 +388,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
+#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -400,7 +401,9 @@
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
+#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f8129c624b07..f7ddd73a8c0f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};
+/*
+ * Counter/Timer offset structure. Describe the virtual/physical offset.
+ * To be used with KVM_ARM_SET_COUNTER_OFFSET.
+ */
+struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+};
+
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
@@ -372,6 +381,10 @@ enum {
#endif
};
+/* Device Control API on vm fd */
+#define KVM_ARM_VM_SMCCC_CTRL 0
+#define KVM_ARM_VM_SMCCC_FILTER 0
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
@@ -411,6 +424,8 @@ enum {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
+#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
@@ -469,6 +484,27 @@ enum {
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
+enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+
+#ifdef __KERNEL__
+ NR_SMCCC_FILTER_ACTIONS
+#endif
+};
+
+struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+};
+
+/* arm64-specific KVM_EXIT_HYPERCALL flags */
+#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
+#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+
#endif
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2e3e55139777..c331c49a7d19 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2223,6 +2223,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sign = FTR_UNSIGNED,
.min_field_value = 1,
},
+ {
+ .desc = "Enhanced Counter Virtualization (CNTPOFF)",
+ .capability = ARM64_HAS_ECV_CNTPOFF,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR0_EL1,
+ .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
+ .field_width = 4,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF,
+ },
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index e1af4301b913..05b022be885b 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -16,6 +16,7 @@
#include <asm/arch_timer.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
@@ -30,14 +31,11 @@ static u32 host_ptimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
-static const struct kvm_irq_level default_ptimer_irq = {
- .irq = 30,
- .level = 1,
-};
-
-static const struct kvm_irq_level default_vtimer_irq = {
- .irq = 27,
- .level = 1,
+static const u8 default_ppi[] = {
+ [TIMER_PTIMER] = 30,
+ [TIMER_VTIMER] = 27,
+ [TIMER_HPTIMER] = 26,
+ [TIMER_HVTIMER] = 28,
};
static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
@@ -51,6 +49,24 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg);
+static bool kvm_arch_timer_get_input_level(int vintid);
+
+static struct irq_ops arch_timer_irq_ops = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+};
+
+static bool has_cntpoff(void)
+{
+ return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
+static int nr_timers(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_nv(vcpu))
+ return NR_KVM_EL0_TIMERS;
+
+ return NR_KVM_TIMERS;
+}
u32 timer_get_ctl(struct arch_timer_context *ctxt)
{
@@ -61,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ case TIMER_HVTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
+ case TIMER_HPTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
default:
WARN_ON(1);
return 0;
@@ -76,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
case TIMER_PTIMER:
return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ case TIMER_HVTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
+ case TIMER_HPTIMER:
+ return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
default:
WARN_ON(1);
return 0;
@@ -84,10 +108,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
+ u64 offset = 0;
+
+ if (!ctxt)
+ return 0;
+
if (ctxt->offset.vm_offset)
- return *ctxt->offset.vm_offset;
+ offset += *ctxt->offset.vm_offset;
+ if (ctxt->offset.vcpu_offset)
+ offset += *ctxt->offset.vcpu_offset;
- return 0;
+ return offset;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@@ -101,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl;
break;
+ case TIMER_HVTIMER:
+ __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl;
+ break;
+ case TIMER_HPTIMER:
+ __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl;
+ break;
default:
WARN_ON(1);
}
@@ -117,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
case TIMER_PTIMER:
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval;
break;
+ case TIMER_HVTIMER:
+ __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval;
+ break;
+ case TIMER_HPTIMER:
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval;
+ break;
default:
WARN_ON(1);
}
@@ -139,13 +182,27 @@ u64 kvm_phys_timer_read(void)
static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
- if (has_vhe()) {
+ if (vcpu_has_nv(vcpu)) {
+ if (is_hyp_ctxt(vcpu)) {
+ map->direct_vtimer = vcpu_hvtimer(vcpu);
+ map->direct_ptimer = vcpu_hptimer(vcpu);
+ map->emul_vtimer = vcpu_vtimer(vcpu);
+ map->emul_ptimer = vcpu_ptimer(vcpu);
+ } else {
+ map->direct_vtimer = vcpu_vtimer(vcpu);
+ map->direct_ptimer = vcpu_ptimer(vcpu);
+ map->emul_vtimer = vcpu_hvtimer(vcpu);
+ map->emul_ptimer = vcpu_hptimer(vcpu);
+ }
+ } else if (has_vhe()) {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = vcpu_ptimer(vcpu);
+ map->emul_vtimer = NULL;
map->emul_ptimer = NULL;
} else {
map->direct_vtimer = vcpu_vtimer(vcpu);
map->direct_ptimer = NULL;
+ map->emul_vtimer = NULL;
map->emul_ptimer = vcpu_ptimer(vcpu);
}
@@ -212,7 +269,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
ns = cyclecounter_cyc2ns(timecounter->cc,
val - now,
timecounter->mask,
- &timecounter->frac);
+ &timer_ctx->ns_frac);
return ns;
}
@@ -240,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *ctx = vcpu_vtimer(vcpu);
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+ struct arch_timer_context *ctx;
+
+ ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
+ : vcpu_vtimer(vcpu);
return kvm_counter_compute_delta(ctx, val);
}
@@ -255,7 +315,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
u64 min_delta = ULLONG_MAX;
int i;
- for (i = 0; i < NR_KVM_TIMERS; i++) {
+ for (i = 0; i < nr_timers(vcpu); i++) {
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
WARN(ctx->loaded, "timer %d loaded\n", i);
@@ -338,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
switch (index) {
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
+ case TIMER_HPTIMER:
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
@@ -392,12 +454,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
int ret;
timer_ctx->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- timer_ctx->irq.irq,
+ timer_irq(timer_ctx),
timer_ctx->irq.level,
timer_ctx);
WARN_ON(ret);
@@ -432,6 +494,12 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
}
+static void set_cntpoff(u64 cntpoff)
+{
+ if (has_cntpoff())
+ write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
+}
+
static void timer_save_state(struct arch_timer_context *ctx)
{
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -447,7 +515,10 @@ static void timer_save_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
+ u64 cval;
+
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
@@ -473,13 +544,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
set_cntvoff(0);
break;
case TIMER_PTIMER:
+ case TIMER_HPTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
- timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL));
+ cval = read_sysreg_el0(SYS_CNTP_CVAL);
+
+ if (!has_cntpoff())
+ cval -= timer_get_offset(ctx);
+
+ timer_set_cval(ctx, cval);
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTP_CTL);
isb();
+ set_cntpoff(0);
break;
case NR_KVM_TIMERS:
BUG();
@@ -510,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
*/
if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
!kvm_timer_irq_can_fire(map.direct_ptimer) &&
+ !kvm_timer_irq_can_fire(map.emul_vtimer) &&
!kvm_timer_irq_can_fire(map.emul_ptimer) &&
!vcpu_has_wfit_active(vcpu))
return;
@@ -543,14 +622,23 @@ static void timer_restore_state(struct arch_timer_context *ctx)
goto out;
switch (index) {
+ u64 cval, offset;
+
case TIMER_VTIMER:
+ case TIMER_HVTIMER:
set_cntvoff(timer_get_offset(ctx));
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
- write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL);
+ case TIMER_HPTIMER:
+ cval = timer_get_cval(ctx);
+ offset = timer_get_offset(ctx);
+ set_cntpoff(offset);
+ if (!has_cntpoff())
+ cval += offset;
+ write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
break;
@@ -586,7 +674,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
if (irqchip_in_kernel(vcpu->kvm))
- phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
+ phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
phys_active |= ctx->irq.level;
@@ -621,6 +709,128 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
+/* If _pred is true, set bit in _set, otherwise set it in _clr */
+#define assign_clear_set_bit(_pred, _bit, _clr, _set) \
+ do { \
+ if (_pred) \
+ (_set) |= (_bit); \
+ else \
+ (_clr) |= (_bit); \
+ } while (0)
+
+static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
+ struct timer_map *map)
+{
+ int hw, ret;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ /*
+ * We only ever unmap the vtimer irq on a VHE system that runs nested
+ * virtualization, in which case we have both a valid emul_vtimer,
+ * emul_ptimer, direct_vtimer, and direct_ptimer.
+ *
+ * Since this is called from kvm_timer_vcpu_load(), a change between
+ * vEL2 and vEL1/0 will have just happened, and the timer_map will
+ * represent this, and therefore we switch the emul/direct mappings
+ * below.
+ */
+ hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
+ if (hw < 0) {
+ kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
+ kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
+
+ ret = kvm_vgic_map_phys_irq(vcpu,
+ map->direct_vtimer->host_timer_irq,
+ timer_irq(map->direct_vtimer),
+ &arch_timer_irq_ops);
+ WARN_ON_ONCE(ret);
+ ret = kvm_vgic_map_phys_irq(vcpu,
+ map->direct_ptimer->host_timer_irq,
+ timer_irq(map->direct_ptimer),
+ &arch_timer_irq_ops);
+ WARN_ON_ONCE(ret);
+
+ /*
+ * The virtual offset behaviour is "interresting", as it
+ * always applies when HCR_EL2.E2H==0, but only when
+ * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
+ * track E2H when putting the HV timer in "direct" mode.
+ */
+ if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
+ struct arch_timer_offset *offs = &map->direct_vtimer->offset;
+
+ if (vcpu_el2_e2h_is_set(vcpu))
+ offs->vcpu_offset = NULL;
+ else
+ offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ }
+ }
+}
+
+static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
+{
+ bool tpt, tpc;
+ u64 clr, set;
+
+ /*
+ * No trapping gets configured here with nVHE. See
+ * __timer_enable_traps(), which is where the stuff happens.
+ */
+ if (!has_vhe())
+ return;
+
+ /*
+ * Our default policy is not to trap anything. As we progress
+ * within this function, reality kicks in and we start adding
+ * traps based on emulation requirements.
+ */
+ tpt = tpc = false;
+
+ /*
+ * We have two possibility to deal with a physical offset:
+ *
+ * - Either we have CNTPOFF (yay!) or the offset is 0:
+ * we let the guest freely access the HW
+ *
+ * - or neither of these condition apply:
+ * we trap accesses to the HW, but still use it
+ * after correcting the physical offset
+ */
+ if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
+ tpt = tpc = true;
+
+ /*
+ * Apply the enable bits that the guest hypervisor has requested for
+ * its own guest. We can only add traps that wouldn't have been set
+ * above.
+ */
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ /* Use the VHE format for mental sanity */
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+ tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
+ tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
+ }
+
+ /*
+ * Now that we have collected our requirements, compute the
+ * trap and enable bits.
+ */
+ set = 0;
+ clr = 0;
+
+ assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
+ assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
+
+ /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
+ sysreg_clear_set(cntkctl_el1, clr, set);
+}
+
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -632,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
get_timer_map(vcpu, &map);
if (static_branch_likely(&has_gic_active_state)) {
+ if (vcpu_has_nv(vcpu))
+ kvm_timer_vcpu_load_nested_switch(vcpu, &map);
+
kvm_timer_vcpu_load_gic(map.direct_vtimer);
if (map.direct_ptimer)
kvm_timer_vcpu_load_gic(map.direct_ptimer);
@@ -644,9 +857,12 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
timer_restore_state(map.direct_vtimer);
if (map.direct_ptimer)
timer_restore_state(map.direct_ptimer);
-
+ if (map.emul_vtimer)
+ timer_emulate(map.emul_vtimer);
if (map.emul_ptimer)
timer_emulate(map.emul_ptimer);
+
+ timer_set_traps(vcpu, &map);
}
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -689,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
* In any case, we re-schedule the hrtimer for the physical timer when
* coming back to the VCPU thread in kvm_timer_vcpu_load().
*/
+ if (map.emul_vtimer)
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
@@ -738,56 +956,89 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
* resets the timer to be disabled and unmasked and is compliant with
* the ARMv7 architecture.
*/
- timer_set_ctl(vcpu_vtimer(vcpu), 0);
- timer_set_ctl(vcpu_ptimer(vcpu), 0);
+ for (int i = 0; i < nr_timers(vcpu); i++)
+ timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
+
+ /*
+ * A vcpu running at EL2 is in charge of the offset applied to
+ * the virtual timer, so use the physical VM offset, and point
+ * the vcpu offset to CNTVOFF_EL2.
+ */
+ if (vcpu_has_nv(vcpu)) {
+ struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
+
+ offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
+ offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
+ }
if (timer->enabled) {
- kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
- kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
+ for (int i = 0; i < nr_timers(vcpu); i++)
+ kvm_timer_update_irq(vcpu, false,
+ vcpu_get_timer(vcpu, i));
if (irqchip_in_kernel(vcpu->kvm)) {
- kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
+ kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
if (map.direct_ptimer)
- kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
+ kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
}
}
+ if (map.emul_vtimer)
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);
return 0;
}
+static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
+{
+ struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
+ struct kvm *kvm = vcpu->kvm;
+
+ ctxt->vcpu = vcpu;
+
+ if (timerid == TIMER_VTIMER)
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
+ else
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
+
+ hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ ctxt->hrtimer.function = kvm_hrtimer_expire;
+
+ switch (timerid) {
+ case TIMER_PTIMER:
+ case TIMER_HPTIMER:
+ ctxt->host_timer_irq = host_ptimer_irq;
+ break;
+ case TIMER_VTIMER:
+ case TIMER_HVTIMER:
+ ctxt->host_timer_irq = host_vtimer_irq;
+ break;
+ }
+}
+
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- vtimer->vcpu = vcpu;
- vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
- ptimer->vcpu = vcpu;
+ for (int i = 0; i < NR_KVM_TIMERS; i++)
+ timer_context_init(vcpu, i);
- /* Synchronize cntvoff across all vtimers of a VM. */
- timer_set_offset(vtimer, kvm_phys_timer_read());
- timer_set_offset(ptimer, 0);
+ /* Synchronize offsets across timers of a VM if not already provided */
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
+ timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
+ timer_set_offset(vcpu_ptimer(vcpu), 0);
+ }
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
+}
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- vtimer->hrtimer.function = kvm_hrtimer_expire;
- ptimer->hrtimer.function = kvm_hrtimer_expire;
-
- vtimer->irq.irq = default_vtimer_irq.irq;
- ptimer->irq.irq = default_ptimer_irq.irq;
-
- vtimer->host_timer_irq = host_vtimer_irq;
- ptimer->host_timer_irq = host_ptimer_irq;
-
- vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
- ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
+void kvm_timer_init_vm(struct kvm *kvm)
+{
+ for (int i = 0; i < NR_KVM_TIMERS; i++)
+ kvm->arch.timer_data.ppi[i] = default_ppi[i];
}
void kvm_timer_cpu_up(void)
@@ -814,8 +1065,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
case KVM_REG_ARM_TIMER_CNT:
- timer = vcpu_vtimer(vcpu);
- timer_set_offset(timer, kvm_phys_timer_read() - value);
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
+ &vcpu->kvm->arch.flags)) {
+ timer = vcpu_vtimer(vcpu);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
+ }
break;
case KVM_REG_ARM_TIMER_CVAL:
timer = vcpu_vtimer(vcpu);
@@ -825,6 +1079,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
break;
+ case KVM_REG_ARM_PTIMER_CNT:
+ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
+ &vcpu->kvm->arch.flags)) {
+ timer = vcpu_ptimer(vcpu);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
+ }
+ break;
case KVM_REG_ARM_PTIMER_CVAL:
timer = vcpu_ptimer(vcpu);
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
@@ -902,6 +1163,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
val = kvm_phys_timer_read() - timer_get_offset(timer);
break;
+ case TIMER_REG_VOFF:
+ val = *timer->offset.vcpu_offset;
+ break;
+
default:
BUG();
}
@@ -920,7 +1185,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
- if (timer == map.emul_ptimer)
+ if (timer == map.emul_vtimer || timer == map.emul_ptimer)
return kvm_arm_timer_read(vcpu, timer, treg);
preempt_disable();
@@ -952,6 +1217,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
timer_set_cval(timer, val);
break;
+ case TIMER_REG_VOFF:
+ *timer->offset.vcpu_offset = val;
+ break;
+
default:
BUG();
}
@@ -967,7 +1236,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
get_timer_map(vcpu, &map);
timer = vcpu_get_timer(vcpu, tmr);
- if (timer == map.emul_ptimer) {
+ if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
soft_timer_cancel(&timer->hrtimer);
kvm_arm_timer_write(vcpu, timer, treg, val);
timer_emulate(timer);
@@ -1047,10 +1316,6 @@ static const struct irq_domain_ops timer_domain_ops = {
.free = timer_irq_domain_free,
};
-static struct irq_ops arch_timer_irq_ops = {
- .get_input_level = kvm_arch_timer_get_input_level,
-};
-
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
{
*flags = irq_get_trigger_type(virq);
@@ -1192,44 +1457,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
- int vtimer_irq, ptimer_irq, ret;
- unsigned long i;
+ u32 ppis = 0;
+ bool valid;
- vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
- ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
- if (ret)
- return false;
+ mutex_lock(&vcpu->kvm->arch.config_lock);
- ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
- ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
- if (ret)
- return false;
+ for (int i = 0; i < nr_timers(vcpu); i++) {
+ struct arch_timer_context *ctx;
+ int irq;
+
+ ctx = vcpu_get_timer(vcpu, i);
+ irq = timer_irq(ctx);
+ if (kvm_vgic_set_owner(vcpu, irq, ctx))
+ break;
- kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
- if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
- vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
- return false;
+ /*
+ * We know by construction that we only have PPIs, so
+ * all values are less than 32.
+ */
+ ppis |= BIT(irq);
}
- return true;
+ valid = hweight32(ppis) == nr_timers(vcpu);
+
+ if (valid)
+ set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
+
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return valid;
}
-bool kvm_arch_timer_get_input_level(int vintid)
+static bool kvm_arch_timer_get_input_level(int vintid)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
- struct arch_timer_context *timer;
if (WARN(!vcpu, "No vcpu context!\n"))
return false;
- if (vintid == vcpu_vtimer(vcpu)->irq.irq)
- timer = vcpu_vtimer(vcpu);
- else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
- timer = vcpu_ptimer(vcpu);
- else
- BUG();
+ for (int i = 0; i < nr_timers(vcpu); i++) {
+ struct arch_timer_context *ctx;
+
+ ctx = vcpu_get_timer(vcpu, i);
+ if (timer_irq(ctx) == vintid)
+ return kvm_timer_should_fire(ctx);
+ }
+
+ /* A timer IRQ has fired, but no matching timer was found? */
+ WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
- return kvm_timer_should_fire(timer);
+ return false;
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
@@ -1258,7 +1535,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
- map.direct_vtimer->irq.irq,
+ timer_irq(map.direct_vtimer),
&arch_timer_irq_ops);
if (ret)
return ret;
@@ -1266,7 +1543,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (map.direct_ptimer) {
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
- map.direct_ptimer->irq.irq,
+ timer_irq(map.direct_ptimer),
&arch_timer_irq_ops);
}
@@ -1278,45 +1555,17 @@ no_vgic:
return 0;
}
-/*
- * On VHE system, we only need to configure the EL2 timer trap register once,
- * not for every world switch.
- * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
- * and this makes those bits have no effect for the host kernel execution.
- */
+/* If we have CNTPOFF, permanently set ECV to enable it */
void kvm_timer_init_vhe(void)
{
- /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
- u32 cnthctl_shift = 10;
- u64 val;
-
- /*
- * VHE systems allow the guest direct access to the EL1 physical
- * timer/counter.
- */
- val = read_sysreg(cnthctl_el2);
- val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
- val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
- write_sysreg(val, cnthctl_el2);
-}
-
-static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
-{
- struct kvm_vcpu *vcpu;
- unsigned long i;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
- vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
- }
+ if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
+ sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
int __user *uaddr = (int __user *)(long)attr->addr;
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- int irq;
+ int irq, idx, ret = 0;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
@@ -1327,21 +1576,42 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (!(irq_is_ppi(irq)))
return -EINVAL;
- if (vcpu->arch.timer_cpu.enabled)
- return -EBUSY;
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+
+ if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
+ &vcpu->kvm->arch.flags)) {
+ ret = -EBUSY;
+ goto out;
+ }
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
- set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
+ idx = TIMER_VTIMER;
break;
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
- set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
+ idx = TIMER_PTIMER;
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ idx = TIMER_HVTIMER;
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
+ idx = TIMER_HPTIMER;
break;
default:
- return -ENXIO;
+ ret = -ENXIO;
+ goto out;
}
- return 0;
+ /*
+ * We cannot validate the IRQ unicity before we run, so take it at
+ * face value. The verdict will be given on first vcpu run, for each
+ * vcpu. Yes this is late. Blame it on the stupid API.
+ */
+ vcpu->kvm->arch.timer_data.ppi[idx] = irq;
+
+out:
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ return ret;
}
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
@@ -1357,11 +1627,17 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
timer = vcpu_ptimer(vcpu);
break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ timer = vcpu_hvtimer(vcpu);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
+ timer = vcpu_hptimer(vcpu);
+ break;
default:
return -ENXIO;
}
- irq = timer->irq.irq;
+ irq = timer_irq(timer);
return put_user(irq, uaddr);
}
@@ -1370,8 +1646,42 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
return 0;
}
return -ENXIO;
}
+
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset)
+{
+ int ret = 0;
+
+ if (offset->reserved)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ if (lock_all_vcpus(kvm)) {
+ set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
+
+ /*
+ * If userspace decides to set the offset using this
+ * API rather than merely restoring the counter
+ * values, the offset applies to both the virtual and
+ * physical views.
+ */
+ kvm->arch.timer_data.voffset = offset->counter_offset;
+ kvm->arch.timer_data.poffset = offset->counter_offset;
+
+ unlock_all_vcpus(kvm);
+ } else {
+ ret = -EBUSY;
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ return ret;
+}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3bd732eaf087..bb21d0c25de7 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -128,6 +128,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret;
+ mutex_init(&kvm->arch.config_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Clue in lockdep that the config_lock must be taken inside kvm->lock */
+ mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->lock);
+#endif
+
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
return ret;
@@ -148,6 +158,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
+ kvm_timer_init_vm(kvm);
+
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->max_vcpus = kvm_arm_default_max_vcpus();
@@ -192,6 +204,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
+
+ kvm_arm_teardown_hypercalls(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -220,6 +234,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ case KVM_CAP_COUNTER_OFFSET:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -326,6 +341,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int err;
+ spin_lock_init(&vcpu->arch.mp_state_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Inform lockdep that the config_lock is acquired after vcpu->mutex */
+ mutex_lock(&vcpu->mutex);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->mutex);
+#endif
+
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
@@ -443,34 +468,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
}
-void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_arm_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
}
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED);
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
kvm_vcpu_kick(vcpu);
}
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- *mp_state = vcpu->arch.mp_state;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@@ -480,12 +512,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.mp_state = *mp_state;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
break;
case KVM_MP_STATE_SUSPENDED:
kvm_arm_vcpu_suspend(vcpu);
@@ -494,6 +528,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
@@ -593,9 +629,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -1210,10 +1246,14 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
/*
* Handle the "start in power-off" case.
*/
+ spin_lock(&vcpu->arch.mp_state_lock);
+
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
+
+ spin_unlock(&vcpu->arch.mp_state_lock);
return 0;
}
@@ -1439,11 +1479,32 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
}
}
+static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_has_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
+static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_set_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
@@ -1479,11 +1540,73 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EFAULT;
return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
}
+ case KVM_ARM_SET_COUNTER_OFFSET: {
+ struct kvm_arm_counter_offset offset;
+
+ if (copy_from_user(&offset, argp, sizeof(offset)))
+ return -EFAULT;
+ return kvm_vm_ioctl_set_counter_offset(kvm, &offset);
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_has_attr(kvm, &attr);
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_set_attr(kvm, &attr);
+ }
default:
return -EINVAL;
}
}
+/* unlocks vcpus from @vcpu_lock_idx and smaller */
+static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
+{
+ struct kvm_vcpu *tmp_vcpu;
+
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&tmp_vcpu->mutex);
+ }
+}
+
+void unlock_all_vcpus(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+
+ unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
+}
+
+/* Returns true if all vcpus were locked, false otherwise */
+bool lock_all_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *tmp_vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->lock);
+
+ /*
+ * Any time a vcpu is in an ioctl (including running), the
+ * core KVM code tries to grab the vcpu->mutex.
+ *
+ * By grabbing the vcpu->mutex of all VCPUs we ensure that no
+ * other VCPUs can fiddle with the state while we access it.
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
+ if (!mutex_trylock(&tmp_vcpu->mutex)) {
+ unlock_vcpus(kvm, c - 1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 07444fa22888..faae7ec2c1ba 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
return copy_core_reg_indices(vcpu, NULL);
}
-/**
- * ARM64 versions of the TIMER registers, always available on arm64
- */
+static const u64 timer_reg_list[] = {
+ KVM_REG_ARM_TIMER_CTL,
+ KVM_REG_ARM_TIMER_CNT,
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_PTIMER_CTL,
+ KVM_REG_ARM_PTIMER_CNT,
+ KVM_REG_ARM_PTIMER_CVAL,
+};
-#define NUM_TIMER_REGS 3
+#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list)
static bool is_timer_reg(u64 index)
{
@@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index)
case KVM_REG_ARM_TIMER_CTL:
case KVM_REG_ARM_TIMER_CNT:
case KVM_REG_ARM_TIMER_CVAL:
+ case KVM_REG_ARM_PTIMER_CTL:
+ case KVM_REG_ARM_PTIMER_CNT:
+ case KVM_REG_ARM_PTIMER_CVAL:
return true;
}
return false;
@@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index)
static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
- return -EFAULT;
- uindices++;
- if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
- return -EFAULT;
+ for (int i = 0; i < NUM_TIMER_REGS; i++) {
+ if (put_user(timer_reg_list[i], uindices))
+ return -EFAULT;
+ uindices++;
+ }
return 0;
}
@@ -957,7 +962,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
switch (attr->group) {
case KVM_ARM_VCPU_PMU_V3_CTRL:
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
break;
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_set_attr(vcpu, attr);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a798c0b4d717..6dcd6604b6bc 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -36,8 +36,6 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
static int handle_hvc(struct kvm_vcpu *vcpu)
{
- int ret;
-
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
@@ -52,33 +50,29 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
return 1;
}
- ret = kvm_hvc_call_handler(vcpu);
- if (ret < 0) {
- vcpu_set_reg(vcpu, 0, ~0UL);
- return 1;
- }
-
- return ret;
+ return kvm_smccc_call_handler(vcpu);
}
static int handle_smc(struct kvm_vcpu *vcpu)
{
- int ret;
-
/*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
* Trap exception, not a Secure Monitor Call exception [...]"
*
* We need to advance the PC after the trap, as it would
- * otherwise return to the same address...
- *
- * Only handle SMCs from the virtual EL2 with an immediate of zero and
- * skip it otherwise.
+ * otherwise return to the same address. Furthermore, pre-incrementing
+ * the PC before potentially exiting to userspace maintains the same
+ * abstraction for both SMCs and HVCs.
+ */
+ kvm_incr_pc(vcpu);
+
+ /*
+ * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9
+ * "SMC and HVC immediate value".
*/
- if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
+ if (kvm_vcpu_hvc_get_imm(vcpu)) {
vcpu_set_reg(vcpu, 0, ~0UL);
- kvm_incr_pc(vcpu);
return 1;
}
@@ -89,13 +83,7 @@ static int handle_smc(struct kvm_vcpu *vcpu)
* at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
* being treated as UNDEFINED.
*/
- ret = kvm_hvc_call_handler(vcpu);
- if (ret < 0)
- vcpu_set_reg(vcpu, 0, ~0UL);
-
- kvm_incr_pc(vcpu);
-
- return ret;
+ return kvm_smccc_call_handler(vcpu);
}
/*
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 07d37ff88a3f..c41166f1a1dd 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -26,6 +26,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -326,6 +327,55 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
+static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
+{
+ struct arch_timer_context *ctxt;
+ u32 sysreg;
+ u64 val;
+
+ /*
+ * We only get here for 64bit guests, 32bit guests will hit
+ * the long and winding road all the way to the standard
+ * handling. Yes, it sucks to be irrelevant.
+ */
+ sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+
+ switch (sysreg) {
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ if (vcpu_has_nv(vcpu)) {
+ if (is_hyp_ctxt(vcpu)) {
+ ctxt = vcpu_hptimer(vcpu);
+ break;
+ }
+
+ /* Check for guest hypervisor trapping */
+ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val = (val & CNTHCTL_EL1PCTEN) << 10;
+
+ if (!(val & (CNTHCTL_EL1PCTEN << 10)))
+ return false;
+ }
+
+ ctxt = vcpu_ptimer(vcpu);
+ break;
+ default:
+ return false;
+ }
+
+ val = arch_timer_read_cntpct_el0();
+
+ if (ctxt->offset.vm_offset)
+ val -= *kern_hyp_va(ctxt->offset.vm_offset);
+ if (ctxt->offset.vcpu_offset)
+ val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
+
+ vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
@@ -339,6 +389,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
return kvm_hyp_handle_ptrauth(vcpu, exit_code);
+ if (kvm_hyp_handle_cntpct(vcpu))
+ return true;
+
return false;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index 9072e71693ba..b185ac0dbd47 100644
--- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -9,6 +9,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
void __kvm_timer_set_cntvoff(u64 cntvoff)
{
@@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu)
*/
void __timer_enable_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 clr = 0, set = 0;
/*
* Disallow physical timer access for the guest
- * Physical counter access is allowed
+ * Physical counter access is allowed if no offset is enforced
+ * or running protected (we don't offset anything in this case).
*/
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
+ clr = CNTHCTL_EL1PCEN;
+ if (is_protected_kvm_enabled() ||
+ !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset)
+ set |= CNTHCTL_EL1PCTEN;
+ else
+ clr |= CNTHCTL_EL1PCTEN;
+
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 5da884e11337..2e16fc7b31bf 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
- cycles = systime_snapshot.cycles;
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset;
break;
default:
return;
@@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
val[3] = lower_32_bits(cycles);
}
-static bool kvm_hvc_call_default_allowed(u32 func_id)
+static bool kvm_smccc_default_allowed(u32 func_id)
{
switch (func_id) {
/*
@@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id)
}
}
-static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
@@ -117,20 +117,161 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
&smccc_feat->vendor_hyp_bmap);
default:
- return kvm_hvc_call_default_allowed(func_id);
+ return false;
+ }
+}
+
+#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID
+#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, 0)
+#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+static void init_smccc_filter(struct kvm *kvm)
+{
+ int r;
+
+ mt_init(&kvm->arch.smccc_filter);
+
+ /*
+ * Prevent userspace from handling any SMCCC calls in the architecture
+ * range, avoiding the risk of misrepresenting Spectre mitigation status
+ * to the guest.
+ */
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+}
+
+static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
+{
+ const void *zero_page = page_to_virt(ZERO_PAGE(0));
+ struct kvm_smccc_filter filter;
+ u32 start, end;
+ int r;
+
+ if (copy_from_user(&filter, uaddr, sizeof(filter)))
+ return -EFAULT;
+
+ if (memcmp(filter.pad, zero_page, sizeof(filter.pad)))
+ return -EINVAL;
+
+ start = filter.base;
+ end = start + filter.nr_functions - 1;
+
+ if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS)
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm_vm_has_ran_once(kvm)) {
+ r = -EBUSY;
+ goto out_unlock;
}
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
+ xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
+ if (r)
+ goto out_unlock;
+
+ set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
+
+out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
+ return r;
+}
+
+static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
+{
+ unsigned long idx = func_id;
+ void *val;
+
+ if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ /*
+ * But where's the error handling, you say?
+ *
+ * mt_find() returns NULL if no entry was found, which just so happens
+ * to match KVM_SMCCC_FILTER_HANDLE.
+ */
+ val = mt_find(&kvm->arch.smccc_filter, &idx, idx);
+ return xa_to_value(val);
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ /*
+ * Intervening actions in the SMCCC filter take precedence over the
+ * pseudo-firmware register bitmaps.
+ */
+ u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id);
+ if (action != KVM_SMCCC_FILTER_HANDLE)
+ return action;
+
+ if (kvm_smccc_test_fw_bmap(vcpu, func_id) ||
+ kvm_smccc_default_allowed(func_id))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ return KVM_SMCCC_FILTER_DENY;
+}
+
+static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id)
+{
+ u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
+ struct kvm_run *run = vcpu->run;
+ u64 flags = 0;
+
+ if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64)
+ flags |= KVM_HYPERCALL_EXIT_SMC;
+
+ if (!kvm_vcpu_trap_il_is32bit(vcpu))
+ flags |= KVM_HYPERCALL_EXIT_16BIT;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+ run->hypercall = (typeof(run->hypercall)) {
+ .nr = func_id,
+ .flags = flags,
+ };
+}
+
+int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
+ u8 action;
gpa_t gpa;
- if (!kvm_hvc_call_allowed(vcpu, func_id))
+ action = kvm_smccc_get_action(vcpu, func_id);
+ switch (action) {
+ case KVM_SMCCC_FILTER_HANDLE:
+ break;
+ case KVM_SMCCC_FILTER_DENY:
+ goto out;
+ case KVM_SMCCC_FILTER_FWD_TO_USER:
+ kvm_prepare_hypercall_exit(vcpu, func_id);
+ return 0;
+ default:
+ WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action);
goto out;
+ }
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
@@ -245,6 +386,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm)
smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+
+ init_smccc_filter(kvm);
+}
+
+void kvm_arm_teardown_hypercalls(struct kvm *kvm)
+{
+ mtree_destroy(&kvm->arch.smccc_filter);
}
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
@@ -377,17 +525,16 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
if (val & ~fw_reg_features)
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
- val != *fw_reg_bmap) {
+ if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) {
ret = -EBUSY;
goto out;
}
WRITE_ONCE(*fw_reg_bmap, val);
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -479,3 +626,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
return -EINVAL;
}
+
+int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+}
+
+int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user *)attr->addr;
+
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return kvm_smccc_set_filter(kvm, uaddr);
+ default:
+ return -ENXIO;
+ }
+}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 24908400e190..8402e5a1354e 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -874,13 +874,13 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
struct arm_pmu *arm_pmu;
int ret = -ENXIO;
- mutex_lock(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
mutex_lock(&arm_pmus_lock);
list_for_each_entry(entry, &arm_pmus, entry) {
arm_pmu = entry->arm_pmu;
if (arm_pmu->pmu.type == pmu_id) {
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
+ if (kvm_vm_has_ran_once(kvm) ||
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
ret = -EBUSY;
break;
@@ -894,7 +894,6 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
}
mutex_unlock(&arm_pmus_lock);
- mutex_unlock(&kvm->lock);
return ret;
}
@@ -902,22 +901,20 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
struct kvm *kvm = vcpu->kvm;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
- mutex_lock(&kvm->lock);
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
- if (!kvm->arch.arm_pmu) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.arm_pmu)
return -ENODEV;
- }
}
- mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
@@ -961,19 +958,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
- mutex_lock(&kvm->lock);
-
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
- mutex_unlock(&kvm->lock);
+ if (kvm_vm_has_ran_once(kvm))
return -EBUSY;
- }
if (!kvm->arch.pmu_filter) {
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
- if (!kvm->arch.pmu_filter) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.pmu_filter)
return -ENOMEM;
- }
/*
* The default depends on the first applied filter.
@@ -992,8 +983,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
else
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
- mutex_unlock(&kvm->lock);
-
return 0;
}
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 7fbc4c1b9df0..1f69b667332b 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
struct vcpu_reset_state *reset_state;
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL;
+ int ret = PSCI_RET_SUCCESS;
unsigned long cpu_id;
cpu_id = smccc_get_arg1(source_vcpu);
@@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
+
+ spin_lock(&vcpu->arch.mp_state_lock);
if (!kvm_arm_vcpu_stopped(vcpu)) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
- return PSCI_RET_ALREADY_ON;
+ ret = PSCI_RET_ALREADY_ON;
else
- return PSCI_RET_INVALID_PARAMS;
+ ret = PSCI_RET_INVALID_PARAMS;
+
+ goto out_unlock;
}
reset_state = &vcpu->arch.reset_state;
@@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
reset_state->r0 = smccc_get_arg3(source_vcpu);
- WRITE_ONCE(reset_state->reset, true);
+ reset_state->reset = true;
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/*
@@ -105,10 +110,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/
smp_wmb();
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
- return PSCI_RET_SUCCESS;
+out_unlock:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+ return ret;
}
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
@@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&tmp->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&tmp->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
int ret = 1;
@@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
kvm_psci_narrow_to_32bit(vcpu);
fallthrough;
case PSCI_0_2_FN64_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
case PSCI_0_2_FN_AFFINITY_INFO:
kvm_psci_narrow_to_32bit(vcpu);
@@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
@@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
default:
val = PSCI_RET_NOT_SUPPORTED;
@@ -435,6 +439,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
u32 psci_fn = smccc_get_function(vcpu);
+ int version = kvm_psci_version(vcpu);
unsigned long val;
val = kvm_psci_check_allowed_function(vcpu, psci_fn);
@@ -443,7 +448,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
return 1;
}
- switch (kvm_psci_version(vcpu)) {
+ switch (version) {
case KVM_ARM_PSCI_1_1:
return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
@@ -453,6 +458,8 @@ int kvm_psci_call(struct kvm_vcpu *vcpu)
case KVM_ARM_PSCI_0_1:
return kvm_psci_0_1_call(vcpu);
default:
- return -EINVAL;
+ WARN_ONCE(1, "Unknown PSCI version %d", version);
+ smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+ return 1;
}
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 49a3257dec46..b5dee8e57e77 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
- lockdep_assert_held(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
/*
@@ -262,17 +262,18 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
bool loaded;
u32 pstate;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = kvm_set_vm_width(vcpu);
- if (!ret) {
- reset_state = vcpu->arch.reset_state;
- WRITE_ONCE(vcpu->arch.reset_state.reset, false);
- }
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
if (ret)
return ret;
+ spin_lock(&vcpu->arch.mp_state_lock);
+ reset_state = vcpu->arch.reset_state;
+ vcpu->arch.reset_state.reset = false;
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
/* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 53749d3a0996..feca77083a5c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
tmr = TIMER_PTIMER;
treg = TIMER_REG_CVAL;
break;
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ case SYS_AARCH32_CNTPCT:
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CNT;
+ break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
kvm_inject_undefined(vcpu);
@@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(14),
AMU_AMEVTYPER1_EL0(15),
+ { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
@@ -2525,10 +2533,12 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
{ CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
+ { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
+ { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer },
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index f3e46a976125..6ce5c025218d 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map,
__field( unsigned long, vcpu_id )
__field( int, direct_vtimer )
__field( int, direct_ptimer )
+ __field( int, emul_vtimer )
__field( int, emul_ptimer )
),
@@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map,
__entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer);
__entry->direct_ptimer =
(map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
+ __entry->emul_vtimer =
+ (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1;
__entry->emul_ptimer =
(map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
),
- TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
+ TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d",
__entry->vcpu_id,
__entry->direct_vtimer,
__entry->direct_ptimer,
+ __entry->emul_vtimer,
__entry->emul_ptimer)
);
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 78cde687383c..07aa0437125a 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
struct kvm *kvm = s->private;
struct vgic_state_iter *iter;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
if (iter) {
iter = ERR_PTR(-EBUSY);
@@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
if (end_of_vgic(iter))
iter = NULL;
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return iter;
}
@@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
if (IS_ERR(v))
return;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
kfree(iter->lpi_array);
kfree(iter);
kvm->arch.vgic.iter = NULL;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index cd134db41a57..9d42c7cb2b58 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
unsigned long i;
int ret;
- if (irqchip_in_kernel(kvm))
- return -EEXIST;
-
/*
* This function is also called by the KVM_CREATE_IRQCHIP handler,
* which had no chance yet to check the availability of the GICv2
@@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
+ /* Must be held to avoid race with vCPU creation */
+ lockdep_assert_held(&kvm->lock);
+
ret = -EBUSY;
if (!lock_all_vcpus(kvm))
return ret;
+ mutex_lock(&kvm->arch.config_lock);
+
+ if (irqchip_in_kernel(kvm)) {
+ ret = -EEXIST;
+ goto out_unlock;
+ }
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu_has_run_once(vcpu))
goto out_unlock;
@@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
unlock_all_vcpus(kvm);
return ret;
}
@@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
ret = vgic_register_redist_iodev(vcpu);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
return ret;
}
@@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
* The function is generally called when nr_spis has been explicitly set
* by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
* vgic_initialized() returns true when this function has succeeded.
- * Must be called with kvm->lock held!
*/
int vgic_init(struct kvm *kvm)
{
@@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm)
int ret = 0, i;
unsigned long idx;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (vgic_initialized(kvm))
return 0;
@@ -373,12 +382,13 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
}
-/* To be called with kvm->lock held */
static void __kvm_vgic_destroy(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
unsigned long i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
vgic_debug_destroy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -389,9 +399,9 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
void kvm_vgic_destroy(struct kvm *kvm)
{
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
__kvm_vgic_destroy(kvm);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
/**
@@ -414,9 +424,9 @@ int vgic_lazy_init(struct kvm *kvm)
if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
return -EBUSY;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
ret = vgic_init(kvm);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
}
return ret;
@@ -441,7 +451,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (likely(vgic_ready(kvm)))
return 0;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
@@ -459,7 +469,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
dist->ready = true;
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2642e9ce2819..750e51e3779a 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -1958,6 +1958,16 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
mutex_init(&its->its_lock);
mutex_init(&its->cmd_lock);
+ /* Yep, even more trickery for lock ordering... */
+#ifdef CONFIG_LOCKDEP
+ mutex_lock(&dev->kvm->arch.config_lock);
+ mutex_lock(&its->cmd_lock);
+ mutex_lock(&its->its_lock);
+ mutex_unlock(&its->its_lock);
+ mutex_unlock(&its->cmd_lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
+#endif
+
its->vgic_its_base = VGIC_ADDR_UNDEF;
INIT_LIST_HEAD(&its->device_list);
@@ -2045,6 +2055,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
+ }
+
+ mutex_lock(&dev->kvm->arch.config_lock);
+
if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
ret = -ENXIO;
goto out;
@@ -2058,11 +2075,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
goto out;
}
- if (!lock_all_vcpus(dev->kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
addr = its->vgic_its_base + offset;
len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
@@ -2076,8 +2088,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
} else {
*reg = region->its_read(dev->kvm, its, addr, len);
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return ret;
}
@@ -2749,14 +2762,15 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
return 0;
mutex_lock(&kvm->lock);
- mutex_lock(&its->its_lock);
if (!lock_all_vcpus(kvm)) {
- mutex_unlock(&its->its_lock);
mutex_unlock(&kvm->lock);
return -EBUSY;
}
+ mutex_lock(&kvm->arch.config_lock);
+ mutex_lock(&its->its_lock);
+
switch (attr) {
case KVM_DEV_ARM_ITS_CTRL_RESET:
vgic_its_reset(kvm, its);
@@ -2769,8 +2783,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
break;
}
- unlock_all_vcpus(kvm);
mutex_unlock(&its->its_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+ unlock_all_vcpus(kvm);
mutex_unlock(&kvm->lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index edeac2380591..35cfa268fd5d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -46,7 +46,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev
struct vgic_dist *vgic = &kvm->arch.vgic;
int r;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -68,7 +68,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev
r = -ENODEV;
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return r;
}
@@ -102,7 +102,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (get_user(addr, uaddr))
return -EFAULT;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
switch (attr->attr) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -191,7 +191,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
}
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
if (!r && !write)
r = put_user(addr, uaddr);
@@ -227,7 +227,7 @@ static int vgic_set_common_attr(struct kvm_device *dev,
(val & 31))
return -EINVAL;
- mutex_lock(&dev->kvm->lock);
+ mutex_lock(&dev->kvm->arch.config_lock);
if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
ret = -EBUSY;
@@ -235,16 +235,16 @@ static int vgic_set_common_attr(struct kvm_device *dev,
dev->kvm->arch.vgic.nr_spis =
val - VGIC_NR_PRIVATE_IRQS;
- mutex_unlock(&dev->kvm->lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
return ret;
}
case KVM_DEV_ARM_VGIC_GRP_CTRL: {
switch (attr->attr) {
case KVM_DEV_ARM_VGIC_CTRL_INIT:
- mutex_lock(&dev->kvm->lock);
+ mutex_lock(&dev->kvm->arch.config_lock);
r = vgic_init(dev->kvm);
- mutex_unlock(&dev->kvm->lock);
+ mutex_unlock(&dev->kvm->arch.config_lock);
return r;
case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
/*
@@ -260,7 +260,10 @@ static int vgic_set_common_attr(struct kvm_device *dev,
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
+
+ mutex_lock(&dev->kvm->arch.config_lock);
r = vgic_v3_save_pending_tables(dev->kvm);
+ mutex_unlock(&dev->kvm->arch.config_lock);
unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return r;
@@ -342,44 +345,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
return 0;
}
-/* unlocks vcpus from @vcpu_lock_idx and smaller */
-static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
-{
- struct kvm_vcpu *tmp_vcpu;
-
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&tmp_vcpu->mutex);
- }
-}
-
-void unlock_all_vcpus(struct kvm *kvm)
-{
- unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
-}
-
-/* Returns true if all vcpus were locked, false otherwise */
-bool lock_all_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *tmp_vcpu;
- unsigned long c;
-
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run and fiddle with the vgic state while we
- * access it.
- */
- kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
- if (!mutex_trylock(&tmp_vcpu->mutex)) {
- unlock_vcpus(kvm, c - 1);
- return false;
- }
- }
-
- return true;
-}
-
/**
* vgic_v2_attr_regs_access - allows user space to access VGIC v2 state
*
@@ -411,15 +376,17 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
+ }
+
+ mutex_lock(&dev->kvm->arch.config_lock);
+
ret = vgic_init(dev->kvm);
if (ret)
goto out;
- if (!lock_all_vcpus(dev->kvm)) {
- ret = -EBUSY;
- goto out;
- }
-
switch (attr->group) {
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val);
@@ -432,8 +399,9 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
break;
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && !is_write)
@@ -569,12 +537,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
- if (unlikely(!vgic_initialized(dev->kvm))) {
- ret = -EBUSY;
- goto out;
+ if (!lock_all_vcpus(dev->kvm)) {
+ mutex_unlock(&dev->kvm->lock);
+ return -EBUSY;
}
- if (!lock_all_vcpus(dev->kvm)) {
+ mutex_lock(&dev->kvm->arch.config_lock);
+
+ if (unlikely(!vgic_initialized(dev->kvm))) {
ret = -EBUSY;
goto out;
}
@@ -609,8 +579,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
break;
}
- unlock_all_vcpus(dev->kvm);
out:
+ mutex_unlock(&dev->kvm->arch.config_lock);
+ unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && uaccess && !is_write) {
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 91201f743033..472b18ac92a2 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
case GICD_CTLR: {
bool was_enabled, is_hwsgi;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
was_enabled = dist->enabled;
is_hwsgi = dist->nassgireq;
@@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
else if (!was_enabled && dist->enabled)
vgic_kick_vcpus(vcpu->kvm);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
break;
}
case GICD_TYPER:
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index e67b3b2c8044..1939c94e0b24 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -530,13 +530,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 val;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
val = __vgic_mmio_read_active(vcpu, addr, len);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
return val;
}
@@ -625,13 +625,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
__vgic_mmio_write_cactive(vcpu, addr, len, val);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
@@ -662,13 +662,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
vgic_access_active_prepare(vcpu, intid);
__vgic_mmio_write_sactive(vcpu, addr, len, val);
vgic_access_active_finish(vcpu, intid);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
}
int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index a413718be92b..3bb003478060 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -232,9 +232,8 @@ int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)
* @kvm: Pointer to the VM being initialized
*
* We may be called each time a vITS is created, or when the
- * vgic is initialized. This relies on kvm->lock to be
- * held. In both cases, the number of vcpus should now be
- * fixed.
+ * vgic is initialized. In both cases, the number of vcpus
+ * should now be fixed.
*/
int vgic_v4_init(struct kvm *kvm)
{
@@ -243,6 +242,8 @@ int vgic_v4_init(struct kvm *kvm)
int nr_vcpus, ret;
unsigned long i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
@@ -309,14 +310,14 @@ int vgic_v4_init(struct kvm *kvm)
/**
* vgic_v4_teardown - Free the GICv4 data structures
* @kvm: Pointer to the VM being destroyed
- *
- * Relies on kvm->lock to be held.
*/
void vgic_v4_teardown(struct kvm *kvm)
{
struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
int i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!its_vm->vpes)
return;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index d97e6080b421..8be4c1ebdec2 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -24,11 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
/*
* Locking order is always:
* kvm->lock (mutex)
- * its->cmd_lock (mutex)
- * its->its_lock (mutex)
- * vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * kvm->lpi_list_lock must be taken with IRQs disabled
- * vgic_irq->irq_lock must be taken with IRQs disabled
+ * vcpu->mutex (mutex)
+ * kvm->arch.config_lock (mutex)
+ * its->cmd_lock (mutex)
+ * its->its_lock (mutex)
+ * vgic_cpu->ap_list_lock must be taken with IRQs disabled
+ * kvm->lpi_list_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
@@ -573,6 +575,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
return 0;
}
+int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+ unsigned long flags;
+ int ret = -1;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ if (irq->hw)
+ ret = irq->hwintid;
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ return ret;
+}
+
/**
* kvm_vgic_set_owner - Set the owner of an interrupt for a VM
*
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 7f7f3c5ed85a..f9923beedd27 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
-bool lock_all_vcpus(struct kvm *kvm);
-void unlock_all_vcpus(struct kvm *kvm);
-
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 37b1340e9646..40ba95472594 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -23,6 +23,7 @@ HAS_DCPOP
HAS_DIT
HAS_E0PD
HAS_ECV
+HAS_ECV_CNTPOFF
HAS_EPAN
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index dd5a9c7e310f..7063f1aacc54 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1952,6 +1952,10 @@ Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg CNTPOFF_EL2 3 4 14 0 6
+Field 63:0 PhysicalOffset
+EndSysreg
+
Sysreg CPACR_EL12 3 5 1 0 2
Fields CPACR_ELx
EndSysreg
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 808c292ad3f4..15bda40517ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2204,4 +2204,11 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+/*
+ * KVM previously used a u32 field in kvm_run to indicate the hypercall was
+ * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the
+ * remaining 31 lower bits must be 0 to preserve ABI.
+ */
+#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 7f467fe05d42..1a6a1f987949 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -559,4 +559,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+/* x86-specific KVM_EXIT_HYPERCALL flags. */
+#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7713420abab0..27a1d5c1a018 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9803,7 +9803,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
vcpu->run->hypercall.args[0] = gpa;
vcpu->run->hypercall.args[1] = npages;
vcpu->run->hypercall.args[2] = attrs;
- vcpu->run->hypercall.longmode = op_64_bit;
+ vcpu->run->hypercall.flags = 0;
+ if (op_64_bit)
+ vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE;
+
+ WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
vcpu->arch.complete_userspace_io = complete_hypercall_exit;
return 0;
}
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 057c8964aefb..cbbc9a6dc571 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -21,6 +21,7 @@
#define CNTHCTL_EVNTEN (1 << 2)
#define CNTHCTL_EVNTDIR (1 << 3)
#define CNTHCTL_EVNTI (0xF << 4)
+#define CNTHCTL_ECV (1 << 12)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index c52a6e6839da..bb3cb005873e 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -13,6 +13,9 @@
enum kvm_arch_timers {
TIMER_PTIMER,
TIMER_VTIMER,
+ NR_KVM_EL0_TIMERS,
+ TIMER_HVTIMER = NR_KVM_EL0_TIMERS,
+ TIMER_HPTIMER,
NR_KVM_TIMERS
};
@@ -21,6 +24,7 @@ enum kvm_arch_timer_regs {
TIMER_REG_CVAL,
TIMER_REG_TVAL,
TIMER_REG_CTL,
+ TIMER_REG_VOFF,
};
struct arch_timer_offset {
@@ -29,21 +33,29 @@ struct arch_timer_offset {
* structure. If NULL, assume a zero offset.
*/
u64 *vm_offset;
+ /*
+ * If set, pointer to one of the offsets in the vcpu's sysreg
+ * array. If NULL, assume a zero offset.
+ */
+ u64 *vcpu_offset;
};
struct arch_timer_vm_data {
/* Offset applied to the virtual timer/counter */
u64 voffset;
+ /* Offset applied to the physical timer/counter */
+ u64 poffset;
+
+ /* The PPI for each timer, global to the VM */
+ u8 ppi[NR_KVM_TIMERS];
};
struct arch_timer_context {
struct kvm_vcpu *vcpu;
- /* Timer IRQ */
- struct kvm_irq_level irq;
-
/* Emulated Timer (may be unused) */
struct hrtimer hrtimer;
+ u64 ns_frac;
/* Offset for this counter/timer */
struct arch_timer_offset offset;
@@ -54,14 +66,19 @@ struct arch_timer_context {
*/
bool loaded;
+ /* Output level of the timer IRQ */
+ struct {
+ bool level;
+ } irq;
+
/* Duplicated state from arch_timer.c for convenience */
u32 host_timer_irq;
- u32 host_timer_irq_flags;
};
struct timer_map {
struct arch_timer_context *direct_vtimer;
struct arch_timer_context *direct_ptimer;
+ struct arch_timer_context *emul_vtimer;
struct arch_timer_context *emul_ptimer;
};
@@ -84,6 +101,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
void kvm_timer_update_run(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+void kvm_timer_init_vm(struct kvm *kvm);
+
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
@@ -98,15 +117,18 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
void kvm_timer_init_vhe(void);
-bool kvm_arch_timer_get_input_level(int vintid);
-
#define vcpu_timer(v) (&(v)->arch.timer_cpu)
#define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)])
#define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER])
#define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER])
+#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER])
+#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER])
#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers)
+#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data)
+#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
+
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timers tmr,
enum kvm_arch_timer_regs treg);
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
index 1188f116cf4e..2df152207ccd 100644
--- a/include/kvm/arm_hypercalls.h
+++ b/include/kvm/arm_hypercalls.h
@@ -6,7 +6,7 @@
#include <asm/kvm_emulate.h>
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+int kvm_smccc_call_handler(struct kvm_vcpu *vcpu);
static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
{
@@ -43,9 +43,13 @@ static inline void smccc_set_retval(struct kvm_vcpu *vcpu,
struct kvm_one_reg;
void kvm_arm_init_hypercalls(struct kvm *kvm);
+void kvm_arm_teardown_hypercalls(struct kvm *kvm);
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr);
+int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr);
+
#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d3ad51fde9db..402b545959af 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -380,6 +380,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
u32 vintid, struct irq_ops *ops);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
+int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d77aef872a0a..16287a996c32 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -341,8 +341,13 @@ struct kvm_run {
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+
+ union {
+#ifndef __KERNEL__
+ __u32 longmode;
+#endif
+ __u64 flags;
+ };
} hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
@@ -1184,6 +1189,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_COUNTER_OFFSET 227
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1543,6 +1549,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
+/* Available with KVM_CAP_COUNTER_OFFSET */
+#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 84a627c43795..d66a0642cffd 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -141,6 +141,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 26556a266021..8ef370924a02 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -47,6 +47,7 @@ struct test_args {
int nr_iter;
int timer_period_ms;
int migration_freq_ms;
+ struct kvm_arm_counter_offset offset;
};
static struct test_args test_args = {
@@ -54,6 +55,7 @@ static struct test_args test_args = {
.nr_iter = NR_TEST_ITERS_DEF,
.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+ .offset = { .reserved = 1 },
};
#define msecs_to_usecs(msec) ((msec) * 1000LL)
@@ -121,25 +123,35 @@ static void guest_validate_irq(unsigned int intid,
uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
unsigned long xctl = 0;
unsigned int timer_irq = 0;
+ unsigned int accessor;
- if (stage == GUEST_STAGE_VTIMER_CVAL ||
- stage == GUEST_STAGE_VTIMER_TVAL) {
- xctl = timer_get_ctl(VIRTUAL);
- timer_set_ctl(VIRTUAL, CTL_IMASK);
- xcnt = timer_get_cntct(VIRTUAL);
- cval = timer_get_cval(VIRTUAL);
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ switch (stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ case GUEST_STAGE_VTIMER_TVAL:
+ accessor = VIRTUAL;
timer_irq = vtimer_irq;
- } else if (stage == GUEST_STAGE_PTIMER_CVAL ||
- stage == GUEST_STAGE_PTIMER_TVAL) {
- xctl = timer_get_ctl(PHYSICAL);
- timer_set_ctl(PHYSICAL, CTL_IMASK);
- xcnt = timer_get_cntct(PHYSICAL);
- cval = timer_get_cval(PHYSICAL);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ case GUEST_STAGE_PTIMER_TVAL:
+ accessor = PHYSICAL;
timer_irq = ptimer_irq;
- } else {
+ break;
+ default:
GUEST_ASSERT(0);
+ return;
}
+ xctl = timer_get_ctl(accessor);
+ if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+ return;
+
+ timer_set_ctl(accessor, CTL_IMASK);
+ xcnt = timer_get_cntct(accessor);
+ cval = timer_get_cval(accessor);
+
xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
/* Make sure we are dealing with the correct timer IRQ */
@@ -148,6 +160,8 @@ static void guest_validate_irq(unsigned int intid,
/* Basic 'timer condition met' check */
GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
}
static void guest_irq_handler(struct ex_regs *regs)
@@ -158,8 +172,6 @@ static void guest_irq_handler(struct ex_regs *regs)
guest_validate_irq(intid, shared_data);
- WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
-
gic_set_eoi(intid);
}
@@ -372,6 +384,13 @@ static struct kvm_vm *test_vm_create(void)
vm_init_descriptor_tables(vm);
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+ if (!test_args.offset.reserved) {
+ if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
+ vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
+ else
+ TEST_FAIL("no support for global offset\n");
+ }
+
for (i = 0; i < nr_vcpus; i++)
vcpu_init_descriptor_tables(vcpus[i]);
@@ -403,6 +422,7 @@ static void test_print_help(char *name)
TIMER_TEST_PERIOD_MS_DEF);
pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
TIMER_TEST_MIGRATION_FREQ_MS);
+ pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
pr_info("\t-h: print this help screen\n");
}
@@ -410,7 +430,7 @@ static bool parse_args(int argc, char *argv[])
{
int opt;
- while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) {
switch (opt) {
case 'n':
test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
@@ -429,6 +449,10 @@ static bool parse_args(int argc, char *argv[])
case 'm':
test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
break;
+ case 'o':
+ test_args.offset.counter_offset = strtol(optarg, NULL, 0);
+ test_args.offset.reserved = 0;
+ break;
case 'h':
default:
goto err;
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index d287dd2cac0a..d4e1f4af29d6 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -651,7 +651,7 @@ int main(int ac, char **av)
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
- * The blessed list is up to date with kernel version v5.13-rc3
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -807,10 +807,10 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 3, 7),
ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 2),
+ ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 3),
ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 5),
+ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 6),
ARM64_SYS_REG(3, 0, 0, 4, 7),
ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
@@ -823,7 +823,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 5, 7),
ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 2),
+ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 3),
ARM64_SYS_REG(3, 0, 0, 6, 4),
ARM64_SYS_REG(3, 0, 0, 6, 5),
@@ -832,8 +832,8 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3),
- ARM64_SYS_REG(3, 0, 0, 7, 4),
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 5),
ARM64_SYS_REG(3, 0, 0, 7, 6),
ARM64_SYS_REG(3, 0, 0, 7, 7),
@@ -858,6 +858,9 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
new file mode 100644
index 000000000000..f4ceae9c8925
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ * is prevented from filtering the architecture range of SMCCC calls.
+ * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+ HVC_INSN,
+ SMC_INSN,
+};
+
+#define for_each_conduit(conduit) \
+ for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+ struct arm_smccc_res res;
+
+ if (conduit == SMC_INSN)
+ smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+ else
+ smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ struct kvm_smccc_filter filter = {
+ .base = start,
+ .nr_functions = nr_functions,
+ .action = action,
+ };
+
+ return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+ TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+ /*
+ * Enable in-kernel emulation of PSCI to ensure that calls are denied
+ * due to the SMCCC filter, not because of KVM.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ struct kvm_smccc_filter filter = {
+ .base = PSCI_0_2_FN_PSCI_VERSION,
+ .nr_functions = 1,
+ .action = KVM_SMCCC_FILTER_DENY,
+ .pad = { -1 },
+ };
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ uint32_t smc64_fn;
+ int r;
+
+ r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+ 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+ 0, 0);
+
+ r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to filter 0 functions should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to overflow filter range should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to use reserved filter action should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter already configured range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+
+ TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+ "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_denied(vcpu);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+ enum smccc_conduit conduit)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+ "Unexpected exit reason: %u", run->exit_reason);
+ TEST_ASSERT(run->hypercall.nr == func_id,
+ "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+ if (conduit == SMC_INSN)
+ TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+ "KVM_HYPERCALL_EXIT_SMC is not set");
+ else
+ TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+ "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+ kvm_vm_free(vm);
+ return !r;
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_supports_smccc_filter());
+
+ test_pad_must_be_zero();
+ test_invalid_nr_functions();
+ test_overflow_nr_functions();
+ test_reserved_action();
+ test_filter_reserved_range();
+ test_filter_overlap();
+ test_filter_denied();
+ test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index d011b38e259e..8835fed09e9f 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -2,3 +2,4 @@ CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
CONFIG_USERFAULTFD=y
+CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 5f977528e09c..cb537253a6b9 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -214,6 +214,19 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+
+
uint32_t guest_get_vcpuid(void);
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 5972a23b2765..3a0259e25335 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -58,10 +58,27 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
return (gva >> vm->page_shift) & mask;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
+ uint64_t pte;
+
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+ uint64_t pa;
+
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+
+ return pa;
}
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
@@ -110,18 +127,18 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -130,8 +147,7 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -226,7 +242,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
- uint64_t sctlr_el1, tcr_el1;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
init = &default_init;
@@ -277,10 +293,13 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
/* Configure output size */
switch (vm->mode) {
case VM_MODE_P52V48_64K:
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
@@ -310,7 +329,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
@@ -508,29 +527,43 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
close(kvm_fd);
}
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res)
{
- asm volatile("mov w0, %w[function_id]\n"
- "mov x1, %[arg0]\n"
- "mov x2, %[arg1]\n"
- "mov x3, %[arg2]\n"
- "mov x4, %[arg3]\n"
- "mov x5, %[arg4]\n"
- "mov x6, %[arg5]\n"
- "mov x7, %[arg6]\n"
- "hvc #0\n"
- "mov %[res0], x0\n"
- "mov %[res1], x1\n"
- "mov %[res2], x2\n"
- "mov %[res3], x3\n"
- : [res0] "=r"(res->a0), [res1] "=r"(res->a1),
- [res2] "=r"(res->a2), [res3] "=r"(res->a3)
- : [function_id] "r"(function_id), [arg0] "r"(arg0),
- [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),
- [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
- : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
}
void kvm_selftest_arch_init(void)