summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorAaron Lewis <aaronlewis@google.com>2022-12-20 19:12:33 +0300
committerSean Christopherson <seanjc@google.com>2023-01-24 21:06:12 +0300
commit14329b825ffb7f2710c13fdcc37fc2e7c67b6781 (patch)
treeb21d27479b32bcc4ed3d5241639ffdf857c81870 /arch
parentc5a287fa0dccd3e43a6ea5602191f9ac09a68889 (diff)
downloadlinux-14329b825ffb7f2710c13fdcc37fc2e7c67b6781.tar.xz
KVM: x86/pmu: Introduce masked events to the pmu event filter
When building a list of filter events, it can sometimes be a challenge to fit all the events needed to adequately restrict the guest into the limited space available in the pmu event filter. This stems from the fact that the pmu event filter requires each event (i.e. event select + unit mask) be listed, when the intention might be to restrict the event select all together, regardless of it's unit mask. Instead of increasing the number of filter events in the pmu event filter, add a new encoding that is able to do a more generalized match on the unit mask. Introduce masked events as another encoding the pmu event filter understands. Masked events has the fields: mask, match, and exclude. When filtering based on these events, the mask is applied to the guest's unit mask to see if it matches the match value (i.e. umask & mask == match). The exclude bit can then be used to exclude events from that match. E.g. for a given event select, if it's easier to say which unit mask values shouldn't be filtered, a masked event can be set up to match all possible unit mask values, then another masked event can be set up to match the unit mask values that shouldn't be filtered. Userspace can query to see if this feature exists by looking for the capability, KVM_CAP_PMU_EVENT_MASKED_EVENTS. This feature is enabled by setting the flags field in the pmu event filter to KVM_PMU_EVENT_FLAG_MASKED_EVENTS. Events can be encoded by using KVM_PMU_ENCODE_MASKED_ENTRY(). It is an error to have a bit set outside the valid bits for a masked event, and calls to KVM_SET_PMU_EVENT_FILTER will return -EINVAL in such cases, including the high bits of the event select (35:32) if called on Intel. With these updates the filter matching code has been updated to match on a common event. Masked events were flexible enough to handle both event types, so they were used as the common event. This changes how guest events get filtered because regardless of the type of event used in the uAPI, they will be converted to masked events. Because of this there could be a slight performance hit because instead of matching the filter event with a lookup on event select + unit mask, it does a lookup on event select then walks the unit masks to find the match. This shouldn't be a big problem because I would expect the set of common event selects to be small, and if they aren't the set can likely be reduced by using masked events to generalize the unit mask. Using one type of event when filtering guest events allows for a common code path to be used. Signed-off-by: Aaron Lewis <aaronlewis@google.com> Link: https://lore.kernel.org/r/20221220161236.555143-5-aaronlewis@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h14
-rw-r--r--arch/x86/include/uapi/asm/kvm.h29
-rw-r--r--arch/x86/kvm/pmu.c197
-rw-r--r--arch/x86/kvm/x86.c1
4 files changed, 210 insertions, 31 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4d2bc08794e4..cd0151e6af62 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1145,6 +1145,18 @@ struct kvm_x86_msr_filter {
struct msr_bitmap_range ranges[16];
};
+struct kvm_x86_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 nr_includes;
+ __u32 nr_excludes;
+ __u64 *includes;
+ __u64 *excludes;
+ __u64 events[];
+};
+
enum kvm_apicv_inhibit {
/********************************************************************/
@@ -1363,7 +1375,7 @@ struct kvm_arch {
/* Guest can access the SGX PROVISIONKEY. */
bool sgx_provisioning_allowed;
- struct kvm_pmu_event_filter __rcu *pmu_event_filter;
+ struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_huge_page_recovery_thread;
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e48deab8901d..f142f3ebf4e4 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -525,6 +525,35 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+
+/*
+ * Masked event layout.
+ * Bits Description
+ * ---- -----------
+ * 7:0 event select (low bits)
+ * 15:8 umask match
+ * 31:16 unused
+ * 35:32 event select (high bits)
+ * 36:54 unused
+ * 55 exclude bit
+ * 63:56 umask mask
+ */
+
+#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \
+ (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \
+ (((mask) & 0xFFULL) << 56) | \
+ (((match) & 0xFFULL) << 8) | \
+ ((__u64)(!!(exclude)) << 55))
+
+#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
+ (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
+
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8a734f4343bb..3264f8e0e8ef 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -255,30 +255,99 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
-static int cmp_u64(const void *pa, const void *pb)
+static int filter_cmp(const void *pa, const void *pb, u64 mask)
{
- u64 a = *(u64 *)pa;
- u64 b = *(u64 *)pb;
+ u64 a = *(u64 *)pa & mask;
+ u64 b = *(u64 *)pb & mask;
return (a > b) - (a < b);
}
-static u64 *find_filter_entry(struct kvm_pmu_event_filter *filter, u64 key)
+
+static int filter_sort_cmp(const void *pa, const void *pb)
+{
+ return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT |
+ KVM_PMU_MASKED_ENTRY_EXCLUDE));
+}
+
+/*
+ * For the event filter, searching is done on the 'includes' list and
+ * 'excludes' list separately rather than on the 'events' list (which
+ * has both). As a result the exclude bit can be ignored.
+ */
+static int filter_event_cmp(const void *pa, const void *pb)
+{
+ return filter_cmp(pa, pb, (KVM_PMU_MASKED_ENTRY_EVENT_SELECT));
+}
+
+static int find_filter_index(u64 *events, u64 nevents, u64 key)
+{
+ u64 *fe = bsearch(&key, events, nevents, sizeof(events[0]),
+ filter_event_cmp);
+
+ if (!fe)
+ return -1;
+
+ return fe - events;
+}
+
+static bool is_filter_entry_match(u64 filter_event, u64 umask)
+{
+ u64 mask = filter_event >> (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8);
+ u64 match = filter_event & KVM_PMU_MASKED_ENTRY_UMASK_MATCH;
+
+ BUILD_BUG_ON((KVM_PMU_ENCODE_MASKED_ENTRY(0, 0xff, 0, false) >>
+ (KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT - 8)) !=
+ ARCH_PERFMON_EVENTSEL_UMASK);
+
+ return (umask & mask) == match;
+}
+
+static bool filter_contains_match(u64 *events, u64 nevents, u64 eventsel)
{
- return bsearch(&key, filter->events, filter->nevents,
- sizeof(filter->events[0]), cmp_u64);
+ u64 event_select = eventsel & kvm_pmu_ops.EVENTSEL_EVENT;
+ u64 umask = eventsel & ARCH_PERFMON_EVENTSEL_UMASK;
+ int i, index;
+
+ index = find_filter_index(events, nevents, event_select);
+ if (index < 0)
+ return false;
+
+ /*
+ * Entries are sorted by the event select. Walk the list in both
+ * directions to process all entries with the targeted event select.
+ */
+ for (i = index; i < nevents; i++) {
+ if (filter_event_cmp(&events[i], &event_select))
+ break;
+
+ if (is_filter_entry_match(events[i], umask))
+ return true;
+ }
+
+ for (i = index - 1; i >= 0; i--) {
+ if (filter_event_cmp(&events[i], &event_select))
+ break;
+
+ if (is_filter_entry_match(events[i], umask))
+ return true;
+ }
+
+ return false;
}
-static bool is_gp_event_allowed(struct kvm_pmu_event_filter *filter, u64 eventsel)
+static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
+ u64 eventsel)
{
- if (find_filter_entry(filter, eventsel & (kvm_pmu_ops.EVENTSEL_EVENT |
- ARCH_PERFMON_EVENTSEL_UMASK)))
- return filter->action == KVM_PMU_EVENT_ALLOW;
+ if (filter_contains_match(f->includes, f->nr_includes, eventsel) &&
+ !filter_contains_match(f->excludes, f->nr_excludes, eventsel))
+ return f->action == KVM_PMU_EVENT_ALLOW;
- return filter->action == KVM_PMU_EVENT_DENY;
+ return f->action == KVM_PMU_EVENT_DENY;
}
-static bool is_fixed_event_allowed(struct kvm_pmu_event_filter *filter, int idx)
+static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
+ int idx)
{
int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
@@ -294,7 +363,7 @@ static bool is_fixed_event_allowed(struct kvm_pmu_event_filter *filter, int idx)
static bool check_pmu_event_filter(struct kvm_pmc *pmc)
{
- struct kvm_pmu_event_filter *filter;
+ struct kvm_x86_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
@@ -604,60 +673,128 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
}
EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
-static void remove_impossible_events(struct kvm_pmu_event_filter *filter)
+static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter)
+{
+ u64 mask = kvm_pmu_ops.EVENTSEL_EVENT |
+ KVM_PMU_MASKED_ENTRY_UMASK_MASK |
+ KVM_PMU_MASKED_ENTRY_UMASK_MATCH |
+ KVM_PMU_MASKED_ENTRY_EXCLUDE;
+ int i;
+
+ for (i = 0; i < filter->nevents; i++) {
+ if (filter->events[i] & ~mask)
+ return false;
+ }
+
+ return true;
+}
+
+static void convert_to_masked_filter(struct kvm_x86_pmu_event_filter *filter)
{
int i, j;
for (i = 0, j = 0; i < filter->nevents; i++) {
+ /*
+ * Skip events that are impossible to match against a guest
+ * event. When filtering, only the event select + unit mask
+ * of the guest event is used. To maintain backwards
+ * compatibility, impossible filters can't be rejected :-(
+ */
if (filter->events[i] & ~(kvm_pmu_ops.EVENTSEL_EVENT |
ARCH_PERFMON_EVENTSEL_UMASK))
continue;
-
- filter->events[j++] = filter->events[i];
+ /*
+ * Convert userspace events to a common in-kernel event so
+ * only one code path is needed to support both events. For
+ * the in-kernel events use masked events because they are
+ * flexible enough to handle both cases. To convert to masked
+ * events all that's needed is to add an "all ones" umask_mask,
+ * (unmasked filter events don't support EXCLUDE).
+ */
+ filter->events[j++] = filter->events[i] |
+ (0xFFULL << KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT);
}
filter->nevents = j;
}
+static int prepare_filter_lists(struct kvm_x86_pmu_event_filter *filter)
+{
+ int i;
+
+ if (!(filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS))
+ convert_to_masked_filter(filter);
+ else if (!is_masked_filter_valid(filter))
+ return -EINVAL;
+
+ /*
+ * Sort entries by event select and includes vs. excludes so that all
+ * entries for a given event select can be processed efficiently during
+ * filtering. The EXCLUDE flag uses a more significant bit than the
+ * event select, and so the sorted list is also effectively split into
+ * includes and excludes sub-lists.
+ */
+ sort(&filter->events, filter->nevents, sizeof(filter->events[0]),
+ filter_sort_cmp, NULL);
+
+ i = filter->nevents;
+ /* Find the first EXCLUDE event (only supported for masked events). */
+ if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) {
+ for (i = 0; i < filter->nevents; i++) {
+ if (filter->events[i] & KVM_PMU_MASKED_ENTRY_EXCLUDE)
+ break;
+ }
+ }
+
+ filter->nr_includes = i;
+ filter->nr_excludes = filter->nevents - filter->nr_includes;
+ filter->includes = filter->events;
+ filter->excludes = filter->events + filter->nr_includes;
+
+ return 0;
+}
+
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
{
- struct kvm_pmu_event_filter tmp, *filter;
+ struct kvm_pmu_event_filter __user *user_filter = argp;
+ struct kvm_x86_pmu_event_filter *filter;
+ struct kvm_pmu_event_filter tmp;
struct kvm_vcpu *vcpu;
unsigned long i;
size_t size;
int r;
- if (copy_from_user(&tmp, argp, sizeof(tmp)))
+ if (copy_from_user(&tmp, user_filter, sizeof(tmp)))
return -EFAULT;
if (tmp.action != KVM_PMU_EVENT_ALLOW &&
tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL;
- if (tmp.flags != 0)
+ if (tmp.flags & ~KVM_PMU_EVENT_FLAGS_VALID_MASK)
return -EINVAL;
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG;
size = struct_size(filter, events, tmp.nevents);
- filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
+ filter = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (!filter)
return -ENOMEM;
+ filter->action = tmp.action;
+ filter->nevents = tmp.nevents;
+ filter->fixed_counter_bitmap = tmp.fixed_counter_bitmap;
+ filter->flags = tmp.flags;
+
r = -EFAULT;
- if (copy_from_user(filter, argp, size))
+ if (copy_from_user(filter->events, user_filter->events,
+ sizeof(filter->events[0]) * filter->nevents))
goto cleanup;
- /* Restore the verified state to guard against TOCTOU attacks. */
- *filter = tmp;
-
- remove_impossible_events(filter);
-
- /*
- * Sort the in-kernel list so that we can search it with bsearch.
- */
- sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
+ r = prepare_filter_lists(filter);
+ if (r)
+ goto cleanup;
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 508074e47bc0..da02a08e21b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4406,6 +4406,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_PMU_EVENT_FILTER:
+ case KVM_CAP_PMU_EVENT_MASKED_EVENTS:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD: