summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-04-11 20:06:24 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2024-04-12 11:41:14 +0300
commit531f5200243242d79e99751e4f8a16592afd121f (patch)
treeb892d8ce293bd19e4fe634c2a305a5d041b5ac39
parent9bc60f733839ab6fcdde0d0b15cbb486123e6402 (diff)
parentf7842747d13d9f4eedba9edec5b834c9d9237717 (diff)
downloadlinux-531f5200243242d79e99751e4f8a16592afd121f.tar.xz
Merge branch 'mm-delete-change-gpte' into HEAD
The .change_pte() MMU notifier callback was intended as an optimization and for this reason it was initially called without a surrounding mmu_notifier_invalidate_range_{start,end}() pair. It was only ever implemented by KVM (which was also the original user of MMU notifiers) and the rules on when to call set_pte_at_notify() rather than set_pte_at() have always been pretty obscure. It may seem a miracle that it has never caused any hard to trigger bugs, but there's a good reason for that: KVM's implementation has been nonfunctional for a good part of its existence. Already in 2012, commit 6bdb913f0a70 ("mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end", 2012-10-09) changed the .change_pte() callback to occur within an invalidate_range_start/end() pair; and because KVM unmaps the sPTEs during .invalidate_range_start(), .change_pte() has no hope of finding a sPTE to change. Therefore, all the code for .change_pte() can be removed from both KVM and mm/, and set_pte_at_notify() can be replaced with just set_pte_at(). Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/arm64/kvm/mmu.c34
-rw-r--r--arch/loongarch/include/asm/kvm_host.h1
-rw-r--r--arch/loongarch/kvm/mmu.c32
-rw-r--r--arch/mips/kvm/mmu.c30
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/kvm/book3s.c5
-rw-r--r--arch/powerpc/kvm/book3s.h1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_pr.c7
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c6
-rw-r--r--arch/riscv/kvm/mmu.c20
-rw-r--r--arch/x86/kvm/mmu/mmu.c67
-rw-r--r--arch/x86/kvm/mmu/spte.c16
-rw-r--r--arch/x86/kvm/mmu/spte.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c46
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h1
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/mmu_notifier.h44
-rw-r--r--include/trace/events/kvm.h15
-rw-r--r--kernel/events/uprobes.c6
-rw-r--r--mm/ksm.c4
-rw-r--r--mm/memory.c7
-rw-r--r--mm/migrate_device.c8
-rw-r--r--mm/mmu_notifier.c17
-rw-r--r--virt/kvm/kvm_main.c50
26 files changed, 16 insertions, 419 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc04bc767865..ff17849be9f4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1768,40 +1768,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.mmu.pgt)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- /*
- * If the page isn't tagged, defer to user_mem_abort() for sanitising
- * the MTE tags. The S2 pte should have been unmapped by
- * mmu_notifier_invalidate_range_end().
- */
- if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
- return false;
-
- /*
- * We've moved a page around, probably through CoW, so let's treat
- * it just like a translation fault and the map handler will clean
- * the cache to the PoC.
- *
- * The MMU notifiers will have unmapped a huge PMD before calling
- * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
- * therefore we never need to clear out a huge PMD through this
- * calling path and a memcache is not required.
- */
- kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
- PAGE_SIZE, __pfn_to_phys(pfn),
- KVM_PGTABLE_PROT_R, NULL, 0);
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 2d62f7b0d377..69305441f40d 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -203,7 +203,6 @@ void kvm_flush_tlb_all(void);
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index a556cff35740..98883aa23ab8 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -494,38 +494,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
range->end << PAGE_SHIFT, &ctx);
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- unsigned long prot_bits;
- kvm_pte_t *ptep;
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
- gpa_t gpa = range->start << PAGE_SHIFT;
-
- ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
- if (!ptep)
- return false;
-
- /* Replacing an absent or old page doesn't need flushes */
- if (!kvm_pte_present(NULL, ptep) || !kvm_pte_young(*ptep)) {
- kvm_set_pte(ptep, 0);
- return false;
- }
-
- /* Fill new pte if write protected or page migrated */
- prot_bits = _PAGE_PRESENT | __READABLE;
- prot_bits |= _CACHE_MASK & pte_val(range->arg.pte);
-
- /*
- * Set _PAGE_WRITE or _PAGE_DIRTY iff old and new pte both support
- * _PAGE_WRITE for map_page_fast if next page write fault
- * _PAGE_DIRTY since gpa has already recorded as dirty page
- */
- prot_bits |= __WRITEABLE & *ptep & pte_val(range->arg.pte);
- kvm_set_pte(ptep, kvm_pfn_pte(pfn, __pgprot(prot_bits)));
-
- return true;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_ptw_ctx ctx;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 467ee6b95ae1..c17157e700c0 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -444,36 +444,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return true;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- gpa_t gpa = range->start << PAGE_SHIFT;
- pte_t hva_pte = range->arg.pte;
- pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
- pte_t old_pte;
-
- if (!gpa_pte)
- return false;
-
- /* Mapping may need adjusting depending on memslot flags */
- old_pte = *gpa_pte;
- if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
- hva_pte = pte_mkclean(hva_pte);
- else if (range->slot->flags & KVM_MEM_READONLY)
- hva_pte = pte_wrprotect(hva_pte);
-
- set_pte(gpa_pte, hva_pte);
-
- /* Replacing an absent or old page doesn't need flushes */
- if (!pte_present(old_pte) || !pte_young(old_pte))
- return false;
-
- /* Pages swapped, aged, moved, or cleaned require flushes */
- return !pte_present(hva_pte) ||
- !pte_young(hva_pte) ||
- pte_pfn(old_pte) != pte_pfn(hva_pte) ||
- (pte_dirty(old_pte) && !pte_dirty(hva_pte));
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3281215097cc..ca3829d47ab7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -287,7 +287,6 @@ struct kvmppc_ops {
bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
- bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 8acec144120e..0d0624088e6b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -899,11 +899,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- return kvm->arch.kvm_ops->set_spte_gfn(kvm, range);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 58391b4b32ed..4aa2ab89afbc 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -12,7 +12,6 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
-extern bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2b1f0cdd8c18..1b51b1c4713b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1010,18 +1010,6 @@ bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
-bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- WARN_ON(range->start + 1 != range->end);
-
- if (kvm_is_radix(kvm))
- kvm_unmap_radix(kvm, range->slot, range->start);
- else
- kvm_unmap_rmapp(kvm, range->slot, range->start);
-
- return false;
-}
-
static int vcpus_running(struct kvm *kvm)
{
return atomic_read(&kvm->arch.vcpus_running) != 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8e86eb577eb8..35cb014a0c51 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -6364,7 +6364,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
.age_gfn = kvm_age_gfn_hv,
.test_age_gfn = kvm_test_age_gfn_hv,
- .set_spte_gfn = kvm_set_spte_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5b92619a05fd..a7d7137ea0c8 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -461,12 +461,6 @@ static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-static bool kvm_set_spte_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /* The page will get remapped properly on its next fault */
- return do_kvm_unmap_gfn(kvm, range);
-}
-
/*****************************************/
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
@@ -2071,7 +2065,6 @@ static struct kvmppc_ops kvm_ops_pr = {
.unmap_gfn_range = kvm_unmap_gfn_range_pr,
.age_gfn = kvm_age_gfn_pr,
.test_age_gfn = kvm_test_age_gfn_pr,
- .set_spte_gfn = kvm_set_spte_gfn_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index ccb8f16ffe41..c664fdec75b1 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -747,12 +747,6 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /* The page will get remapped properly on its next fault */
- return kvm_e500_mmu_unmap_gfn(kvm, range);
-}
-
/*****************************************/
int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index a9e2fd7245e1..b63650f9b966 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- int ret;
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.pgd)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
- __pfn_to_phys(pfn), PAGE_SIZE, true, true);
- if (ret) {
- kvm_debug("Failed to map G-stage page (error %d)\n", ret);
- return true;
- }
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
pte_t *ptep;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 992e651540e8..08900a0563f9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -432,8 +432,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte (mm/gup.c).
*
- * An spte tlb flush may be pending, because kvm_set_pte_rmap
- * coalesces them and we are running out of the MMU lock. Therefore
+ * An spte tlb flush may be pending, because they are coalesced and
+ * we are running out of the MMU lock. Therefore
* we need to protect against in-progress updates of the spte.
*
* Reading the spte while an update is in progress may get the old value
@@ -1448,49 +1448,11 @@ static bool __kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_zap_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
return __kvm_zap_rmap(kvm, rmap_head, slot);
}
-static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t pte)
-{
- u64 *sptep;
- struct rmap_iterator iter;
- bool need_flush = false;
- u64 new_spte;
- kvm_pfn_t new_pfn;
-
- WARN_ON_ONCE(pte_huge(pte));
- new_pfn = pte_pfn(pte);
-
-restart:
- for_each_rmap_spte(rmap_head, &iter, sptep) {
- need_flush = true;
-
- if (pte_write(pte)) {
- kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
- goto restart;
- } else {
- new_spte = kvm_mmu_changed_pte_notifier_make_spte(
- *sptep, new_pfn);
-
- mmu_spte_clear_track_bits(kvm, sptep);
- mmu_spte_set(sptep, new_spte);
- }
- }
-
- if (need_flush && kvm_available_flush_remote_tlbs_range()) {
- kvm_flush_remote_tlbs_gfn(kvm, gfn, level);
- return false;
- }
-
- return need_flush;
-}
-
struct slot_rmap_walk_iterator {
/* input fields. */
const struct kvm_memory_slot *slot;
@@ -1562,7 +1524,7 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator)
typedef bool (*rmap_handler_t)(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn,
- int level, pte_t pte);
+ int level);
static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
struct kvm_gfn_range *range,
@@ -1574,7 +1536,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
range->start, range->end - 1, &iterator)
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
- iterator.level, range->arg.pte);
+ iterator.level);
return ret;
}
@@ -1596,22 +1558,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return flush;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- bool flush = false;
-
- if (kvm_memslots_have_rmaps(kvm))
- flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap);
-
- if (tdp_mmu_enabled)
- flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
-
- return flush;
-}
-
static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn, int level,
- pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1624,8 +1572,7 @@ static bool kvm_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
}
static bool kvm_test_age_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
- struct kvm_memory_slot *slot, gfn_t gfn,
- int level, pte_t unused)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level)
{
u64 *sptep;
struct rmap_iterator iter;
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 4a599130e9c9..6c7ab3aa6aa7 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -322,22 +322,6 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
return spte;
}
-u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
-{
- u64 new_spte;
-
- new_spte = old_spte & ~SPTE_BASE_ADDR_MASK;
- new_spte |= (u64)new_pfn << PAGE_SHIFT;
-
- new_spte &= ~PT_WRITABLE_MASK;
- new_spte &= ~shadow_host_writable_mask;
- new_spte &= ~shadow_mmu_writable_mask;
-
- new_spte = mark_spte_for_access_track(new_spte);
-
- return new_spte;
-}
-
u64 mark_spte_for_access_track(u64 spte)
{
if (spte_ad_enabled(spte))
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index a129951c9a88..f5c600c52f83 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -496,8 +496,6 @@ static inline u64 restore_acc_track_spte(u64 spte)
return spte;
}
-u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
-
void __init kvm_mmu_spte_module_init(void);
void kvm_mmu_reset_all_pte_masks(void);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index d078157e62aa..c6192a52bd31 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1258,52 +1258,6 @@ bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
return kvm_tdp_mmu_handle_gfn(kvm, range, test_age_gfn);
}
-static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
- struct kvm_gfn_range *range)
-{
- u64 new_spte;
-
- /* Huge pages aren't expected to be modified without first being zapped. */
- WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
-
- if (iter->level != PG_LEVEL_4K ||
- !is_shadow_present_pte(iter->old_spte))
- return false;
-
- /*
- * Note, when changing a read-only SPTE, it's not strictly necessary to
- * zero the SPTE before setting the new PFN, but doing so preserves the
- * invariant that the PFN of a present * leaf SPTE can never change.
- * See handle_changed_spte().
- */
- tdp_mmu_iter_set_spte(kvm, iter, 0);
-
- if (!pte_write(range->arg.pte)) {
- new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
- pte_pfn(range->arg.pte));
-
- tdp_mmu_iter_set_spte(kvm, iter, new_spte);
- }
-
- return true;
-}
-
-/*
- * Handle the changed_pte MMU notifier for the TDP MMU.
- * data is a pointer to the new pte_t mapping the HVA specified by the MMU
- * notifier.
- * Returns non-zero if a flush is needed before releasing the MMU lock.
- */
-bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /*
- * No need to handle the remote TLB flush under RCU protection, the
- * target SPTE _must_ be a leaf SPTE, i.e. cannot result in freeing a
- * shadow page. See the WARN on pfn_changed in handle_changed_spte().
- */
- return kvm_tdp_mmu_handle_gfn(kvm, range, set_spte_gfn);
-}
-
/*
* Remove write access from all SPTEs at or above min_level that map GFNs
* [start, end). Returns true if an SPTE has been changed and the TLBs need to
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 6e1ea04ca885..58b55e61bd33 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -31,7 +31,6 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush);
bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot, int min_level);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 48f31dcd318a..afbc99264ffa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -259,7 +259,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER
union kvm_mmu_notifier_arg {
- pte_t pte;
unsigned long attributes;
};
@@ -273,7 +272,6 @@ struct kvm_gfn_range {
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
#endif
enum {
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index f349e08a9dfe..d39ebb10caeb 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -123,15 +123,6 @@ struct mmu_notifier_ops {
unsigned long address);
/*
- * change_pte is called in cases that pte mapping to page is changed:
- * for example, when ksm remaps pte to point to a new shared page.
- */
- void (*change_pte)(struct mmu_notifier *subscription,
- struct mm_struct *mm,
- unsigned long address,
- pte_t pte);
-
- /*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_lock and/or the
* locks protecting the reverse maps are held. If the subsystem
@@ -392,8 +383,6 @@ extern int __mmu_notifier_clear_young(struct mm_struct *mm,
unsigned long end);
extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
-extern void __mmu_notifier_change_pte(struct mm_struct *mm,
- unsigned long address, pte_t pte);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r);
extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
@@ -439,13 +428,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
return 0;
}
-static inline void mmu_notifier_change_pte(struct mm_struct *mm,
- unsigned long address, pte_t pte)
-{
- if (mm_has_notifiers(mm))
- __mmu_notifier_change_pte(mm, address, pte);
-}
-
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@@ -581,26 +563,6 @@ static inline void mmu_notifier_range_init_owner(
__young; \
})
-/*
- * set_pte_at_notify() sets the pte _after_ running the notifier.
- * This is safe to start by updating the secondary MMUs, because the primary MMU
- * pte invalidate must have already happened with a ptep_clear_flush() before
- * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is
- * required when we change both the protection of the mapping from read-only to
- * read-write and the pfn (like during copy on write page faults). Otherwise the
- * old page would remain mapped readonly in the secondary MMUs after the new
- * page is already writable by some CPU through the primary MMU.
- */
-#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
-({ \
- struct mm_struct *___mm = __mm; \
- unsigned long ___address = __address; \
- pte_t ___pte = __pte; \
- \
- mmu_notifier_change_pte(___mm, ___address, ___pte); \
- set_pte_at(___mm, ___address, __ptep, ___pte); \
-})
-
#else /* CONFIG_MMU_NOTIFIER */
struct mmu_notifier_range {
@@ -650,11 +612,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
return 0;
}
-static inline void mmu_notifier_change_pte(struct mm_struct *mm,
- unsigned long address, pte_t pte)
-{
-}
-
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@@ -693,7 +650,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
#define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
-#define set_pte_at_notify set_pte_at
static inline void mmu_notifier_synchronize(void)
{
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 011fba6b5552..74e40d5d4af4 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -456,21 +456,6 @@ TRACE_EVENT(kvm_unmap_hva_range,
__entry->start, __entry->end)
);
-TRACE_EVENT(kvm_set_spte_hva,
- TP_PROTO(unsigned long hva),
- TP_ARGS(hva),
-
- TP_STRUCT__entry(
- __field( unsigned long, hva )
- ),
-
- TP_fast_assign(
- __entry->hva = hva;
- ),
-
- TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
-);
-
TRACE_EVENT(kvm_age_hva,
TP_PROTO(unsigned long start, unsigned long end),
TP_ARGS(start, end),
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index e4834d23e1d1..1215bc299390 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -18,7 +18,7 @@
#include <linux/sched/coredump.h>
#include <linux/export.h>
#include <linux/rmap.h> /* anon_vma_prepare */
-#include <linux/mmu_notifier.h> /* set_pte_at_notify */
+#include <linux/mmu_notifier.h>
#include <linux/swap.h> /* folio_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
@@ -195,8 +195,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
ptep_clear_flush(vma, addr, pvmw.pte);
if (new_page)
- set_pte_at_notify(mm, addr, pvmw.pte,
- mk_pte(new_page, vma->vm_page_prot));
+ set_pte_at(mm, addr, pvmw.pte,
+ mk_pte(new_page, vma->vm_page_prot));
folio_remove_rmap_pte(old_folio, old_page, vma);
if (!folio_mapped(old_folio))
diff --git a/mm/ksm.c b/mm/ksm.c
index 8c001819cf10..108a4d167824 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1345,7 +1345,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (pte_write(entry))
entry = pte_wrprotect(entry);
- set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
+ set_pte_at(mm, pvmw.address, pvmw.pte, entry);
}
*orig_pte = entry;
err = 0;
@@ -1447,7 +1447,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
* See Documentation/mm/mmu_notifier.rst
*/
ptep_clear_flush(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, newpte);
+ set_pte_at(mm, addr, ptep, newpte);
folio = page_folio(page);
folio_remove_rmap_pte(folio, page, vma);
diff --git a/mm/memory.c b/mm/memory.c
index f2bc6dd15eb8..9a6f4d8aa379 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3327,13 +3327,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
ptep_clear_flush(vma, vmf->address, vmf->pte);
folio_add_new_anon_rmap(new_folio, vma, vmf->address);
folio_add_lru_vma(new_folio, vma);
- /*
- * We call the notify macro here because, when using secondary
- * mmu page tables (such as kvm shadow page tables), we want the
- * new page to be mapped directly into the secondary page table.
- */
BUG_ON(unshare && pte_write(entry));
- set_pte_at_notify(mm, vmf->address, vmf->pte, entry);
+ set_pte_at(mm, vmf->address, vmf->pte, entry);
update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
if (old_folio) {
/*
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index b6c27c76e1a0..66206734b1b9 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -664,13 +664,9 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
if (flush) {
flush_cache_page(vma, addr, pte_pfn(orig_pte));
ptep_clear_flush(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, entry);
- update_mmu_cache(vma, addr, ptep);
- } else {
- /* No need to invalidate - it was non-present before */
- set_pte_at(mm, addr, ptep, entry);
- update_mmu_cache(vma, addr, ptep);
}
+ set_pte_at(mm, addr, ptep, entry);
+ update_mmu_cache(vma, addr, ptep);
pte_unmap_unlock(ptep, ptl);
*src = MIGRATE_PFN_MIGRATE;
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index ec3b068cbbe6..8982e6139d07 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -424,23 +424,6 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
return young;
}
-void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
- pte_t pte)
-{
- struct mmu_notifier *subscription;
- int id;
-
- id = srcu_read_lock(&srcu);
- hlist_for_each_entry_rcu(subscription,
- &mm->notifier_subscriptions->list, hlist,
- srcu_read_lock_held(&srcu)) {
- if (subscription->ops->change_pte)
- subscription->ops->change_pte(subscription, mm, address,
- pte);
- }
- srcu_read_unlock(&srcu, id);
-}
-
static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
const struct mmu_notifier_range *range)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fb49c2a60200..658581d4ad68 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -583,8 +583,6 @@ static void kvm_null_fn(void)
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
-static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
-
/* Iterate over each memslot intersecting [start, last] (inclusive) range */
#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
@@ -670,14 +668,12 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
unsigned long start,
unsigned long end,
- union kvm_mmu_notifier_arg arg,
gfn_handler_t handler)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
const struct kvm_mmu_notifier_range range = {
.start = start,
.end = end,
- .arg = arg,
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.flush_on_ret = true,
@@ -705,48 +701,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
return __kvm_handle_hva_range(kvm, &range).ret;
}
-static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- /*
- * Skipping invalid memslots is correct if and only change_pte() is
- * surrounded by invalidate_range_{start,end}(), which is currently
- * guaranteed by the primary MMU. If that ever changes, KVM needs to
- * unmap the memslot instead of skipping the memslot to ensure that KVM
- * doesn't hold references to the old PFN.
- */
- WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
-
- if (range->slot->flags & KVM_MEMSLOT_INVALID)
- return false;
-
- return kvm_set_spte_gfn(kvm, range);
-}
-
-static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address,
- pte_t pte)
-{
- struct kvm *kvm = mmu_notifier_to_kvm(mn);
- const union kvm_mmu_notifier_arg arg = { .pte = pte };
-
- trace_kvm_set_spte_hva(address);
-
- /*
- * .change_pte() must be surrounded by .invalidate_range_{start,end}().
- * If mmu_invalidate_in_progress is zero, then no in-progress
- * invalidations, including this one, found a relevant memslot at
- * start(); rechecking memslots here is unnecessary. Note, a false
- * positive (count elevated by a different invalidation) is sub-optimal
- * but functionally ok.
- */
- WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
- if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
- return;
-
- kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
-}
-
void kvm_mmu_invalidate_begin(struct kvm *kvm)
{
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -910,8 +864,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
{
trace_kvm_age_hva(start, end);
- return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
- kvm_age_gfn);
+ return kvm_handle_hva_range(mn, start, end, kvm_age_gfn);
}
static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@@ -964,7 +917,6 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
.clear_young = kvm_mmu_notifier_clear_young,
.test_young = kvm_mmu_notifier_test_young,
- .change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};