diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-05 00:59:54 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-05 00:59:54 +0300 |
commit | 7c5c3a6177fa9646884114fc7f2e970b0bc50dc9 (patch) | |
tree | 956857522574ae7cb07d2227dc16e53d7e9e00e7 /tools/testing/selftests/kvm/dirty_log_perf_test.c | |
parent | f0a892f599c46af673e47418c47c15e69a7b67f4 (diff) | |
parent | 281106f938d3daaea6f8b6723a8217a2a1ef6936 (diff) | |
download | linux-7c5c3a6177fa9646884114fc7f2e970b0bc50dc9.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"Quite a large pull request due to a selftest API overhaul and some
patches that had come in too late for 5.19.
ARM:
- Unwinder implementations for both nVHE modes (classic and
protected), complete with an overflow stack
- Rework of the sysreg access from userspace, with a complete rewrite
of the vgic-v3 view to allign with the rest of the infrastructure
- Disagregation of the vcpu flags in separate sets to better track
their use model.
- A fix for the GICv2-on-v3 selftest
- A small set of cosmetic fixes
RISC-V:
- Track ISA extensions used by Guest using bitmap
- Added system instruction emulation framework
- Added CSR emulation framework
- Added gfp_custom flag in struct kvm_mmu_memory_cache
- Added G-stage ioremap() and iounmap() functions
- Added support for Svpbmt inside Guest
s390:
- add an interface to provide a hypervisor dump for secure guests
- improve selftests to use TAP interface
- enable interpretive execution of zPCI instructions (for PCI
passthrough)
- First part of deferred teardown
- CPU Topology
- PV attestation
- Minor fixes
x86:
- Permit guests to ignore single-bit ECC errors
- Intel IPI virtualization
- Allow getting/setting pending triple fault with
KVM_GET/SET_VCPU_EVENTS
- PEBS virtualization
- Simplify PMU emulation by just using PERF_TYPE_RAW events
- More accurate event reinjection on SVM (avoid retrying
instructions)
- Allow getting/setting the state of the speaker port data bit
- Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls
are inconsistent
- "Notify" VM exit (detect microarchitectural hangs) for Intel
- Use try_cmpxchg64 instead of cmpxchg64
- Ignore benign host accesses to PMU MSRs when PMU is disabled
- Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior
- Allow NX huge page mitigation to be disabled on a per-vm basis
- Port eager page splitting to shadow MMU as well
- Enable CMCI capability by default and handle injected UCNA errors
- Expose pid of vcpu threads in debugfs
- x2AVIC support for AMD
- cleanup PIO emulation
- Fixes for LLDT/LTR emulation
- Don't require refcounted "struct page" to create huge SPTEs
- Miscellaneous cleanups:
- MCE MSR emulation
- Use separate namespaces for guest PTEs and shadow PTEs bitmasks
- PIO emulation
- Reorganize rmap API, mostly around rmap destruction
- Do not workaround very old KVM bugs for L0 that runs with nesting enabled
- new selftests API for CPUID
Generic:
- Fix races in gfn->pfn cache refresh; do not pin pages tracked by
the cache
- new selftests API using struct kvm_vcpu instead of a (vm, id)
tuple"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits)
selftests: kvm: set rax before vmcall
selftests: KVM: Add exponent check for boolean stats
selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test
selftests: KVM: Check stat name before other fields
KVM: x86/mmu: remove unused variable
RISC-V: KVM: Add support for Svpbmt inside Guest/VM
RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap()
RISC-V: KVM: Add G-stage ioremap() and iounmap() functions
KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache
RISC-V: KVM: Add extensible CSR emulation framework
RISC-V: KVM: Add extensible system instruction emulation framework
RISC-V: KVM: Factor-out instruction emulation into separate sources
RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run
RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function
RISC-V: KVM: Fix variable spelling mistake
RISC-V: KVM: Improve ISA extension by using a bitmap
KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs()
KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog
KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
KVM: x86: Do not block APIC write for non ICR registers
...
Diffstat (limited to 'tools/testing/selftests/kvm/dirty_log_perf_test.c')
-rw-r--r-- | tools/testing/selftests/kvm/dirty_log_perf_test.c | 81 |
1 files changed, 51 insertions, 30 deletions
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d60a34cdfaee..f99e39a672d3 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -59,6 +59,7 @@ static void arch_cleanup_vm(struct kvm_vm *vm) static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static bool run_vcpus_while_disabling_dirty_logging; /* Host variables */ static u64 dirty_log_manual_caps; @@ -68,54 +69,59 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { - int ret; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vcpu *vcpu = vcpu_args->vcpu; + int vcpu_idx = vcpu_args->vcpu_idx; uint64_t pages_count = 0; struct kvm_run *run; struct timespec start; struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - int vcpu_id = vcpu_args->vcpu_id; + int ret; - run = vcpu_state(vm, vcpu_id); + run = vcpu->run; while (!READ_ONCE(host_quit)) { int current_iteration = READ_ONCE(iteration); clock_gettime(CLOCK_MONOTONIC, &start); - ret = _vcpu_run(vm, vcpu_id); + ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); - pr_debug("Got sync event from vCPU %d\n", vcpu_id); - vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("Got sync event from vCPU %d\n", vcpu_idx); + vcpu_last_completed_iteration[vcpu_idx] = current_iteration; pr_debug("vCPU %d updated last completed iteration to %d\n", - vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]); if (current_iteration) { pages_count += vcpu_args->pages; total = timespec_add(total, ts_diff); pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n", - vcpu_id, current_iteration, ts_diff.tv_sec, + vcpu_idx, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } else { pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n", - vcpu_id, current_iteration, ts_diff.tv_sec, + vcpu_idx, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } + /* + * Keep running the guest while dirty logging is being disabled + * (iteration is negative) so that vCPUs are accessing memory + * for the entire duration of zapping collapsible SPTEs. + */ while (current_iteration == READ_ONCE(iteration) && - !READ_ONCE(host_quit)) {} + READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {} } - avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]); pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -207,14 +213,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) uint64_t guest_num_pages; uint64_t host_num_pages; uint64_t pages_per_slot; - int vcpu_id; struct timespec start; struct timespec ts_diff; struct timespec get_dirty_log_total = (struct timespec){0}; struct timespec vcpu_dirty_total = (struct timespec){0}; struct timespec avg; - struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; + int i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, @@ -222,18 +227,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_set_wr_fract(vm, p->wr_fract); - guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / p->slots; bitmaps = alloc_bitmaps(p->slots, pages_per_slot); - if (dirty_log_manual_caps) { - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); - } + if (dirty_log_manual_caps) + vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, + dirty_log_manual_caps); arch_setup_vm(vm, nr_vcpus); @@ -242,15 +245,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_quit = false; clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - vcpu_last_completed_iteration[vcpu_id] = -1; + for (i = 0; i < nr_vcpus; i++) + vcpu_last_completed_iteration[i] = -1; perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + for (i = 0; i < nr_vcpus; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != iteration) ; } @@ -275,8 +278,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration++; pr_debug("Starting iteration %d\n", iteration); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) + for (i = 0; i < nr_vcpus; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != iteration) ; } @@ -305,6 +308,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) } } + /* + * Run vCPUs while dirty logging is being disabled to stress disabling + * in terms of both performance and correctness. Opt-in via command + * line as this significantly increases time to disable dirty logging. + */ + if (run_vcpus_while_disabling_dirty_logging) + WRITE_ONCE(iteration, -1); + /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); disable_dirty_logging(vm, p->slots); @@ -312,7 +323,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - /* Tell the vcpu thread to quit */ + /* + * Tell the vCPU threads to quit. No need to manually check that vCPUs + * have stopped running after disabling dirty logging, the join will + * wait for them to exit. + */ host_quit = true; perf_test_join_vcpu_threads(nr_vcpus); @@ -352,6 +367,9 @@ static void help(char *name) " Warning: a low offset can conflict with the loaded test code.\n"); guest_modes_help(); printf(" -n: Run the vCPUs in nested mode (L2)\n"); + printf(" -e: Run vCPUs while dirty logging is being disabled. This\n" + " can significantly increase runtime, especially if there\n" + " isn't a dedicated pCPU for the main thread.\n"); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -388,8 +406,11 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { switch (opt) { + case 'e': + /* 'e' is for evil. */ + run_vcpus_while_disabling_dirty_logging = true; case 'g': dirty_log_manual_caps = 0; break; |