From bbcd53c960713507ae764bf81970651b5577b95a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 6 May 2021 18:05:55 -0700 Subject: drivers/char: remove /dev/kmem for good Patch series "drivers/char: remove /dev/kmem for good". Exploring /dev/kmem and /dev/mem in the context of memory hot(un)plug and memory ballooning, I started questioning the existence of /dev/kmem. Comparing it with the /proc/kcore implementation, it does not seem to be able to deal with things like a) Pages unmapped from the direct mapping (e.g., to be used by secretmem) -> kern_addr_valid(). virt_addr_valid() is not sufficient. b) Special cases like gart aperture memory that is not to be touched -> mem_pfn_is_ram() Unless I am missing something, it's at least broken in some cases and might fault/crash the machine. Looks like its existence has been questioned before in 2005 and 2010 [1], after ~11 additional years, it might make sense to revive the discussion. CONFIG_DEVKMEM is only enabled in a single defconfig (on purpose or by mistake?). All distributions disable it: in Ubuntu it has been disabled for more than 10 years, in Debian since 2.6.31, in Fedora at least starting with FC3, in RHEL starting with RHEL4, in SUSE starting from 15sp2, and OpenSUSE has it disabled as well. 1) /dev/kmem was popular for rootkits [2] before it got disabled basically everywhere. Ubuntu documents [3] "There is no modern user of /dev/kmem any more beyond attackers using it to load kernel rootkits.". RHEL documents in a BZ [5] "it served no practical purpose other than to serve as a potential security problem or to enable binary module drivers to access structures/functions they shouldn't be touching" 2) /proc/kcore is a decent interface to have a controlled way to read kernel memory for debugging puposes. (will need some extensions to deal with memory offlining/unplug, memory ballooning, and poisoned pages, though) 3) It might be useful for corner case debugging [1]. KDB/KGDB might be a better fit, especially, to write random memory; harder to shoot yourself into the foot. 4) "Kernel Memory Editor" [4] hasn't seen any updates since 2000 and seems to be incompatible with 64bit [1]. For educational purposes, /proc/kcore might be used to monitor value updates -- or older kernels can be used. 5) It's broken on arm64, and therefore, completely disabled there. Looks like it's essentially unused and has been replaced by better suited interfaces for individual tasks (/proc/kcore, KDB/KGDB). Let's just remove it. [1] https://lwn.net/Articles/147901/ [2] https://www.linuxjournal.com/article/10505 [3] https://wiki.ubuntu.com/Security/Features#A.2Fdev.2Fkmem_disabled [4] https://sourceforge.net/projects/kme/ [5] https://bugzilla.redhat.com/show_bug.cgi?id=154796 Link: https://lkml.kernel.org/r/20210324102351.6932-1-david@redhat.com Link: https://lkml.kernel.org/r/20210324102351.6932-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: "Alexander A. Klimov" Cc: Alexander Viro Cc: Alexandre Belloni Cc: Andrew Lunn Cc: Andrey Zhizhikin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Brian Cain Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Chris Zankel Cc: Corentin Labbe Cc: "David S. Miller" Cc: "Eric W. Biederman" Cc: Geert Uytterhoeven Cc: Gerald Schaefer Cc: Greentime Hu Cc: Gregory Clement Cc: Heiko Carstens Cc: Helge Deller Cc: Hillf Danton Cc: huang ying Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: "James E.J. Bottomley" Cc: James Troup Cc: Jiaxun Yang Cc: Jonas Bonn Cc: Jonathan Corbet Cc: Kairui Song Cc: Krzysztof Kozlowski Cc: Kuninori Morimoto Cc: Liviu Dudau Cc: Lorenzo Pieralisi Cc: Luc Van Oostenryck Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Mike Rapoport Cc: Mikulas Patocka Cc: Minchan Kim Cc: Niklas Schnelle Cc: Oleksiy Avramchenko Cc: openrisc@lists.librecores.org Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: "Pavel Machek (CIP)" Cc: Pavel Machek Cc: "Peter Zijlstra (Intel)" Cc: Pierre Morel Cc: Randy Dunlap Cc: Richard Henderson Cc: Rich Felker Cc: Robert Richter Cc: Rob Herring Cc: Russell King Cc: Sam Ravnborg Cc: Sebastian Andrzej Siewior Cc: Sebastian Hesselbarth Cc: sparclinux@vger.kernel.org Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Steven Rostedt Cc: Sudeep Holla Cc: Theodore Dubois Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Viresh Kumar Cc: William Cohen Cc: Xiaoming Ni Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/Kconfig | 10 --- drivers/char/mem.c | 231 --------------------------------------------------- 2 files changed, 241 deletions(-) (limited to 'drivers') diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index d229a2d0c017..b151e0fcdeb5 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -334,16 +334,6 @@ config DEVMEM memory. When in doubt, say "Y". -config DEVKMEM - bool "/dev/kmem virtual device support" - # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write - depends on !ARM64 - help - Say Y here if you want to support the /dev/kmem device. The - /dev/kmem device is rarely used, but can be used for certain - kind of kernel debugging operations. - When in doubt, say "N". - config NVRAM tristate "/dev/nvram support" depends on X86 || HAVE_ARCH_NVRAM_OPS diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 869b9f5e8e03..15dc54fa1d47 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -403,221 +403,6 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) return 0; } -static int mmap_kmem(struct file *file, struct vm_area_struct *vma) -{ - unsigned long pfn; - - /* Turn a kernel-virtual address into a physical page frame */ - pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; - - /* - * RED-PEN: on some architectures there is more mapped memory than - * available in mem_map which pfn_valid checks for. Perhaps should add a - * new macro here. - * - * RED-PEN: vmalloc is not supported right now. - */ - if (!pfn_valid(pfn)) - return -EIO; - - vma->vm_pgoff = pfn; - return mmap_mem(file, vma); -} - -/* - * This function reads the *virtual* memory as seen by the kernel. - */ -static ssize_t read_kmem(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t low_count, read, sz; - char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ - int err = 0; - - read = 0; - if (p < (unsigned long) high_memory) { - low_count = count; - if (count > (unsigned long)high_memory - p) - low_count = (unsigned long)high_memory - p; - -#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE && low_count > 0) { - sz = size_inside_page(p, low_count); - if (clear_user(buf, sz)) - return -EFAULT; - buf += sz; - p += sz; - read += sz; - low_count -= sz; - count -= sz; - } -#endif - while (low_count > 0) { - sz = size_inside_page(p, low_count); - - /* - * On ia64 if a page has been mapped somewhere as - * uncached, then it must also be accessed uncached - * by the kernel or data corruption may occur - */ - kbuf = xlate_dev_kmem_ptr((void *)p); - if (!virt_addr_valid(kbuf)) - return -ENXIO; - - if (copy_to_user(buf, kbuf, sz)) - return -EFAULT; - buf += sz; - p += sz; - read += sz; - low_count -= sz; - count -= sz; - if (should_stop_iteration()) { - count = 0; - break; - } - } - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - while (count > 0) { - sz = size_inside_page(p, count); - if (!is_vmalloc_or_module_addr((void *)p)) { - err = -ENXIO; - break; - } - sz = vread(kbuf, (char *)p, sz); - if (!sz) - break; - if (copy_to_user(buf, kbuf, sz)) { - err = -EFAULT; - break; - } - count -= sz; - buf += sz; - read += sz; - p += sz; - if (should_stop_iteration()) - break; - } - free_page((unsigned long)kbuf); - } - *ppos = p; - return read ? read : err; -} - - -static ssize_t do_write_kmem(unsigned long p, const char __user *buf, - size_t count, loff_t *ppos) -{ - ssize_t written, sz; - unsigned long copied; - - written = 0; -#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED - /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE) { - sz = size_inside_page(p, count); - /* Hmm. Do something? */ - buf += sz; - p += sz; - count -= sz; - written += sz; - } -#endif - - while (count > 0) { - void *ptr; - - sz = size_inside_page(p, count); - - /* - * On ia64 if a page has been mapped somewhere as uncached, then - * it must also be accessed uncached by the kernel or data - * corruption may occur. - */ - ptr = xlate_dev_kmem_ptr((void *)p); - if (!virt_addr_valid(ptr)) - return -ENXIO; - - copied = copy_from_user(ptr, buf, sz); - if (copied) { - written += sz - copied; - if (written) - break; - return -EFAULT; - } - buf += sz; - p += sz; - count -= sz; - written += sz; - if (should_stop_iteration()) - break; - } - - *ppos += written; - return written; -} - -/* - * This function writes to the *virtual* memory as seen by the kernel. - */ -static ssize_t write_kmem(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned long p = *ppos; - ssize_t wrote = 0; - ssize_t virtr = 0; - char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ - int err = 0; - - if (p < (unsigned long) high_memory) { - unsigned long to_write = min_t(unsigned long, count, - (unsigned long)high_memory - p); - wrote = do_write_kmem(p, buf, to_write, ppos); - if (wrote != to_write) - return wrote; - p += wrote; - buf += wrote; - count -= wrote; - } - - if (count > 0) { - kbuf = (char *)__get_free_page(GFP_KERNEL); - if (!kbuf) - return wrote ? wrote : -ENOMEM; - while (count > 0) { - unsigned long sz = size_inside_page(p, count); - unsigned long n; - - if (!is_vmalloc_or_module_addr((void *)p)) { - err = -ENXIO; - break; - } - n = copy_from_user(kbuf, buf, sz); - if (n) { - err = -EFAULT; - break; - } - vwrite(kbuf, (char *)p, sz); - count -= sz; - buf += sz; - virtr += sz; - p += sz; - if (should_stop_iteration()) - break; - } - free_page((unsigned long)kbuf); - } - - *ppos = p; - return virtr + wrote ? : err; -} - static ssize_t read_port(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -855,7 +640,6 @@ static int open_port(struct inode *inode, struct file *filp) #define write_zero write_null #define write_iter_zero write_iter_null #define open_mem open_port -#define open_kmem open_mem static const struct file_operations __maybe_unused mem_fops = { .llseek = memory_lseek, @@ -869,18 +653,6 @@ static const struct file_operations __maybe_unused mem_fops = { #endif }; -static const struct file_operations __maybe_unused kmem_fops = { - .llseek = memory_lseek, - .read = read_kmem, - .write = write_kmem, - .mmap = mmap_kmem, - .open = open_kmem, -#ifndef CONFIG_MMU - .get_unmapped_area = get_unmapped_area_mem, - .mmap_capabilities = memory_mmap_capabilities, -#endif -}; - static const struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, @@ -924,9 +696,6 @@ static const struct memdev { } devlist[] = { #ifdef CONFIG_DEVMEM [DEVMEM_MINOR] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET }, -#endif -#ifdef CONFIG_DEVKMEM - [2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET }, #endif [3] = { "null", 0666, &null_fops, 0 }, #ifdef CONFIG_DEVPORT -- cgit v1.2.3