From 052936080c8fb2f791103995b21bd4018c8df886 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 1 Apr 2011 11:15:12 +0200 Subject: x86-64, NUMA: Remove custom phys_to_nid() implementation phys_to_nid() maps physical address to NUMA node id. This is implemented by building perfect hash in compute_hash_shift() during initialization. However, with SPARSE memory model, the nid is encoded in page flags. The perfect hash implementation was for DISCONTIG memory model which got removed years ago by b263295dbf (x86: 64-bit, make sparsemem vmemmap the only memory model). So, the perfect hash ends up being used only during initialization when the core SPARSE code already provides perfectly acceptable generic early_pfn_to_nid() implementation. Drop phys_to_nid() and use the generic ealry_pfn_to_nid() instead. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Yinghai Lu Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner --- arch/x86/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc6c53a95bfd..b034814bf975 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1703,10 +1703,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y depends on MEMORY_HOTPLUG -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool X86_64 - depends on NUMA - config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA -- cgit v1.2.3 From 3b16651f806d35b5c404f2525fbce76afa3c9297 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 1 Apr 2011 11:15:12 +0200 Subject: x86: Clean up memory model related configs in arch/x86/Kconfig * Remove bogus dependency on ARCH_SELECT_MEMORY_MODEL from ARCH_FLATMEM_ENABLE. ENABLE configs don't interfere with SELECT_MEMORY_MODEL. They just need to indicate whether the specific memory model is supported. * Relocate HAVE_ARCH_ALLOC_REMAP, ARCH_PROC_KCORE_TEXT and ARCH_SPARSEMEM_DEFAULT so that memory model related configs are together in consistent order. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Ingo Molnar Cc: Yinghai Lu Cc: "H. Peter Anvin" Cc: Thomas Gleixner --- arch/x86/Kconfig | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b034814bf975..8db4fbf30b59 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1223,6 +1223,10 @@ config HAVE_ARCH_BOOTMEM def_bool y depends on X86_32 && NUMA +config HAVE_ARCH_ALLOC_REMAP + def_bool y + depends on X86_32 && NUMA + config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM @@ -1231,13 +1235,9 @@ config NEED_NODE_MEMMAP_SIZE def_bool y depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) -config HAVE_ARCH_ALLOC_REMAP - def_bool y - depends on X86_32 && NUMA - config ARCH_FLATMEM_ENABLE def_bool y - depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA + depends on X86_32 && !NUMA config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -1247,20 +1247,16 @@ config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on NUMA && X86_32 -config ARCH_PROC_KCORE_TEXT - def_bool y - depends on X86_64 && PROC_KCORE - -config ARCH_SPARSEMEM_DEFAULT - def_bool y - depends on X86_64 - config ARCH_SPARSEMEM_ENABLE def_bool y depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 +config ARCH_SPARSEMEM_DEFAULT + def_bool y + depends on X86_64 + config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE @@ -1269,6 +1265,10 @@ config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on X86_64 && PROC_KCORE + config ILLEGAL_POINTER_VALUE hex default 0 if X86_32 -- cgit v1.2.3 From 2706a0bf7b02693ed88752df877f10c2206292ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 2 May 2011 17:24:48 +0200 Subject: x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too Now that NUMA init path is unified, amdtopology can be enabled on 32bit. Make amdtopology.c safe on 32bit by explicitly using u64 and drop X86_64 dependency from Kconfig. Inclusion of bootmem.h is added for max_pfn declaration. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Yinghai Lu Cc: David Rientjes Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 2 +- arch/x86/mm/amdtopology.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8db4fbf30b59..50cb68d2901d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1174,7 +1174,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" config AMD_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" - depends on X86_64 && NUMA && PCI + depends on NUMA && PCI ---help--- Enable AMD NUMA node topology detection. You should say Y here if you have a multi processor AMD system. This uses an old method to diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 0919c26820d4..5247d01329ca 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void) int __init amd_numa_init(void) { - unsigned long start = PFN_PHYS(0); - unsigned long end = PFN_PHYS(max_pfn); + u64 start = PFN_PHYS(0); + u64 end = PFN_PHYS(max_pfn); unsigned numnodes; - unsigned long prevbase; + u64 prevbase; int i, j, nb; u32 nodeid, reg; unsigned int bits, cores, apicid_base; @@ -95,7 +96,7 @@ int __init amd_numa_init(void) prevbase = 0; for (i = 0; i < 8; i++) { - unsigned long base, limit; + u64 base, limit; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); @@ -107,18 +108,18 @@ int __init amd_numa_init(void) continue; } if (nodeid >= numnodes) { - pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, + pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid, base, limit); continue; } if (!limit) { - pr_info("Skipping node entry %d (base %lx)\n", + pr_info("Skipping node entry %d (base %Lx)\n", i, base); continue; } if ((base >> 8) & 3 || (limit >> 8) & 3) { - pr_err("Node %d using interleaving mode %lx/%lx\n", + pr_err("Node %d using interleaving mode %Lx/%Lx\n", nodeid, (base >> 8) & 3, (limit >> 8) & 3); return -EINVAL; } @@ -150,19 +151,19 @@ int __init amd_numa_init(void) continue; } if (limit < base) { - pr_err("Node %d bogus settings %lx-%lx.\n", + pr_err("Node %d bogus settings %Lx-%Lx.\n", nodeid, base, limit); continue; } /* Could sort here, but pun for now. Should not happen anyroads. */ if (prevbase > base) { - pr_err("Node map not sorted %lx,%lx\n", + pr_err("Node map not sorted %Lx,%Lx\n", prevbase, base); return -EINVAL; } - pr_info("Node %d MemBase %016lx Limit %016lx\n", + pr_info("Node %d MemBase %016Lx Limit %016Lx\n", nodeid, base, limit); prevbase = base; -- cgit v1.2.3 From 1b7e03ef7570568d2fb9e6640d7006a0edd728f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 2 May 2011 17:24:48 +0200 Subject: x86, NUMA: Enable emulation on 32bit too Now that NUMA init path is unified, NUMA emulation can be enabled on 32bit. Make numa_emluation.c safe on 32bit by doing the followings. * Define MAX_DMA32_PFN on 32bit too. * Include bootmem.h for max_pfn declaration. * Use u64 explicitly and always use PFN_PHYS() when converting page number to address. * Avoid __udivdi3() generation on 32bit by doing number of pages calculation instead in split_nodes_interleave(). And drop X86_64 dependency from Kconfig. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Yinghai Lu Cc: David Rientjes Cc: Thomas Gleixner Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/dma.h | 10 +++------- arch/x86/mm/numa_emulation.c | 16 +++++++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 50cb68d2901d..648fca42ae6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1201,7 +1201,7 @@ config NODES_SPAN_OTHER_NODES config NUMA_EMU bool "NUMA emulation" - depends on X86_64 && NUMA + depends on NUMA ---help--- Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index d1a314b610f6..0bdb0c54d9a1 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -72,19 +72,15 @@ /* 16MB ISA DMA zone */ #define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) -#ifdef CONFIG_X86_32 +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ #define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000) - #else - -/* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) - /* Compat define for old dma zone */ #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) - #endif /* 8237 DMA controllers */ diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index de84cc140379..d0ed086b6247 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "numa_internal.h" @@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei, nr_nodes = MAX_NUMNODES; } - size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; + /* + * Calculate target node size. x86_32 freaks on __udivdi3() so do + * the division in ulong number of pages and convert back. + */ + size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); + size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); + /* * Calculate the number of big nodes that can be allocated as a result * of consolidating the remainder. @@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, */ while (nodes_weight(physnode_mask)) { for_each_node_mask(i, physnode_mask) { - u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; + u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); u64 start, limit, end; int phys_blk; @@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) { static struct numa_meminfo ei __initdata; static struct numa_meminfo pi __initdata; - const u64 max_addr = max_pfn << PAGE_SHIFT; + const u64 max_addr = PFN_PHYS(max_pfn); u8 *phys_dist = NULL; size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); int max_emu_nid, dfl_phys_nid; @@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) if (numa_dist_cnt) { u64 phys; - phys = memblock_find_in_range(0, - (u64)max_pfn_mapped << PAGE_SHIFT, + phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); if (phys == MEMBLOCK_ERROR) { pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); -- cgit v1.2.3 From dc382fd5bcca7098a984705ed6ac880f539d068e Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 16 May 2011 13:54:10 -0700 Subject: x86, mm: Allow ZONE_DMA to be configurable ZONE_DMA is unnecessary for a large number of machines that do not require less than 32-bit DMA addressing, e.g. ISA legacy DMA or PCI cards with a restricted DMA address mask. This patch allows users to disable ZONE_DMA for x86 if they know they will not be using such devices with their kernel. This prevents the VM from unnecessarily reserving a ratio of memory (defaulting to 1/256th of system capacity) with lowmem_reserve_ratio for such allocations when it will never be used. Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1105161353560.4353@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 9 ++++++++- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 648fca42ae6a..0eb801a75dee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,7 +112,14 @@ config MMU def_bool y config ZONE_DMA - def_bool y + bool "DMA memory allocation support" if EXPERT + default y + help + DMA memory allocation support allows devices with less than 32-bit + addressing to allocate within the first 16MB of address space. + Disable if no such devices will be used. + + If unsure, say Y. config SBUS bool diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2cde0a34bed6..29f7c6d98179 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -678,8 +678,10 @@ static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; +#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0404bb3a077e..d865c4aeec55 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -616,7 +616,9 @@ void __init paging_init(void) unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_NORMAL] = max_pfn; -- cgit v1.2.3