summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-10-31 04:40:57 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-10-31 04:40:57 +0300
commitf0d25b5d0f8ef0ad35f1beff17da5843279d47a1 (patch)
tree73bb0fea8ed4a96d01517349dfb2588c3ae37e6d /arch/x86/mm
parent1641b9b04002c22f616a51a164c04b7f679d241f (diff)
parenta1e2b8b36820d8c91275f207e77e91645b7c6836 (diff)
downloadlinux-f0d25b5d0f8ef0ad35f1beff17da5843279d47a1.tar.xz
Merge tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm handling updates from Ingo Molnar: - Add new NX-stack self-test - Improve NUMA partial-CFMWS handling - Fix #VC handler bugs resulting in SEV-SNP boot failures - Drop the 4MB memory size restriction on minimal NUMA nodes - Reorganize headers a bit, in preparation to header dependency reduction efforts - Misc cleanups & fixes * tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size selftests/x86/lam: Zero out buffer for readlink() x86/sev: Drop unneeded #include x86/sev: Move sev_setup_arch() to mem_encrypt.c x86/tdx: Replace deprecated strncpy() with strtomem_pad() selftests/x86/mm: Add new test that userspace stack is in fact NX x86/sev: Make boot_ghcb_page[] static x86/boot: Move x86_cache_alignment initialization to correct spot x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach x86/sev-es: Allow copy_from_kernel_nofault() in earlier boot x86_64: Show CR4.PSE on auxiliaries like on BSP x86/iommu/docs: Update AMD IOMMU specification document URL x86/sev/docs: Update document URL in amd-memory-encryption.rst x86/mm: Move arch_memory_failure() and arch_is_platform_page() definitions from <asm/processor.h> to <asm/pgtable.h> ACPI/NUMA: Apply SRAT proximity domain to entire CFMWS window x86/numa: Introduce numa_fill_memblks()
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/maccess.c19
-rw-r--r--arch/x86/mm/mem_encrypt.c34
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c36
-rw-r--r--arch/x86/mm/numa.c87
4 files changed, 128 insertions, 48 deletions
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 5a53c2cc169c..6993f026adec 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -9,12 +9,21 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
unsigned long vaddr = (unsigned long)unsafe_src;
/*
- * Range covering the highest possible canonical userspace address
- * as well as non-canonical address range. For the canonical range
- * we also need to include the userspace guard page.
+ * Do not allow userspace addresses. This disallows
+ * normal userspace and the userspace guard page:
*/
- return vaddr >= TASK_SIZE_MAX + PAGE_SIZE &&
- __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
+ if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
+ return false;
+
+ /*
+ * Allow everything during early boot before 'x86_virt_bits'
+ * is initialized. Needed for instruction decoding in early
+ * exception handlers.
+ */
+ if (!boot_cpu_data.x86_virt_bits)
+ return true;
+
+ return __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits);
}
#else
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 9f27e14e185f..c290c55b632b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -12,6 +12,7 @@
#include <linux/swiotlb.h>
#include <linux/cc_platform.h>
#include <linux/mem_encrypt.h>
+#include <linux/virtio_anchor.h>
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
@@ -86,3 +87,36 @@ void __init mem_encrypt_init(void)
print_mem_encrypt_feature_info();
}
+
+void __init mem_encrypt_setup_arch(void)
+{
+ phys_addr_t total_mem = memblock_phys_mem_size();
+ unsigned long size;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * For SEV and TDX, all DMA has to occur via shared/unencrypted pages.
+ * Kernel uses SWIOTLB to make this happen without changing device
+ * drivers. However, depending on the workload being run, the
+ * default 64MB of SWIOTLB may not be enough and SWIOTLB may
+ * run out of buffers for DMA, resulting in I/O errors and/or
+ * performance degradation especially with high I/O workloads.
+ *
+ * Adjust the default size of SWIOTLB using a percentage of guest
+ * memory for SWIOTLB buffers. Also, as the SWIOTLB bounce buffer
+ * memory is allocated from low memory, ensure that the adjusted size
+ * is within the limits of low available memory.
+ *
+ * The percentage of guest memory used here for SWIOTLB buffers
+ * is more of an approximation of the static adjustment which
+ * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
+ */
+ size = total_mem * 6 / 100;
+ size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
+ swiotlb_adjust_size(size);
+
+ /* Set restricted memory access for virtio. */
+ virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
+}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 6faea41e99b6..a68f2dda0948 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -19,8 +19,6 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/dma-mapping.h>
-#include <linux/virtio_config.h>
-#include <linux/virtio_anchor.h>
#include <linux/cc_platform.h>
#include <asm/tlbflush.h>
@@ -215,40 +213,6 @@ void __init sme_map_bootdata(char *real_mode_data)
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
}
-void __init sev_setup_arch(void)
-{
- phys_addr_t total_mem = memblock_phys_mem_size();
- unsigned long size;
-
- if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
- return;
-
- /*
- * For SEV, all DMA has to occur via shared/unencrypted pages.
- * SEV uses SWIOTLB to make this happen without changing device
- * drivers. However, depending on the workload being run, the
- * default 64MB of SWIOTLB may not be enough and SWIOTLB may
- * run out of buffers for DMA, resulting in I/O errors and/or
- * performance degradation especially with high I/O workloads.
- *
- * Adjust the default size of SWIOTLB for SEV guests using
- * a percentage of guest memory for SWIOTLB buffers.
- * Also, as the SWIOTLB bounce buffer memory is allocated
- * from low memory, ensure that the adjusted size is within
- * the limits of low available memory.
- *
- * The percentage of guest memory used here for SWIOTLB buffers
- * is more of an approximation of the static adjustment which
- * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
- */
- size = total_mem * 6 / 100;
- size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
- swiotlb_adjust_size(size);
-
- /* Set restricted memory access for virtio. */
- virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
-}
-
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{
unsigned long pfn = 0;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c79f12e449ea..4a17f663f949 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -12,6 +12,7 @@
#include <linux/nodemask.h>
#include <linux/sched.h>
#include <linux/topology.h>
+#include <linux/sort.h>
#include <asm/e820/api.h>
#include <asm/proto.h>
@@ -602,13 +603,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (start >= end)
continue;
- /*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- continue;
-
alloc_node_data(nid);
}
@@ -964,4 +958,83 @@ int memory_add_physaddr_to_nid(u64 start)
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+
+static int __init cmp_memblk(const void *a, const void *b)
+{
+ const struct numa_memblk *ma = *(const struct numa_memblk **)a;
+ const struct numa_memblk *mb = *(const struct numa_memblk **)b;
+
+ return ma->start - mb->start;
+}
+
+static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
+
+/**
+ * numa_fill_memblks - Fill gaps in numa_meminfo memblks
+ * @start: address to begin fill
+ * @end: address to end fill
+ *
+ * Find and extend numa_meminfo memblks to cover the @start-@end
+ * physical address range, such that the first memblk includes
+ * @start, the last memblk includes @end, and any gaps in between
+ * are filled.
+ *
+ * RETURNS:
+ * 0 : Success
+ * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ */
+
+int __init numa_fill_memblks(u64 start, u64 end)
+{
+ struct numa_memblk **blk = &numa_memblk_list[0];
+ struct numa_meminfo *mi = &numa_meminfo;
+ int count = 0;
+ u64 prev_end;
+
+ /*
+ * Create a list of pointers to numa_meminfo memblks that
+ * overlap start, end. Exclude (start == bi->end) since
+ * end addresses in both a CFMWS range and a memblk range
+ * are exclusive.
+ *
+ * This list of pointers is used to make in-place changes
+ * that fill out the numa_meminfo memblks.
+ */
+ for (int i = 0; i < mi->nr_blks; i++) {
+ struct numa_memblk *bi = &mi->blk[i];
+
+ if (start < bi->end && end >= bi->start) {
+ blk[count] = &mi->blk[i];
+ count++;
+ }
+ }
+ if (!count)
+ return NUMA_NO_MEMBLK;
+
+ /* Sort the list of pointers in memblk->start order */
+ sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
+
+ /* Make sure the first/last memblks include start/end */
+ blk[0]->start = min(blk[0]->start, start);
+ blk[count - 1]->end = max(blk[count - 1]->end, end);
+
+ /*
+ * Fill any gaps by tracking the previous memblks
+ * end address and backfilling to it if needed.
+ */
+ prev_end = blk[0]->end;
+ for (int i = 1; i < count; i++) {
+ struct numa_memblk *curr = blk[i];
+
+ if (prev_end >= curr->start) {
+ if (prev_end < curr->end)
+ prev_end = curr->end;
+ } else {
+ curr->start = prev_end;
+ prev_end = curr->end;
+ }
+ }
+ return 0;
+}
+
#endif