From 60e93dc097f7f13a16a7e4b75b8803eb2adbb721 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 13 Oct 2020 16:50:39 -0700 Subject: device-dax: add dis-contiguous resource support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Break the requirement that device-dax instances are physically contiguous. With this constraint removed it allows fragmented available capacity to be fully allocated. This capability is useful to mitigate the "noisy neighbor" problem with memory-side-cache management for virtual machines, or any other scenario where a platform address boundary also designates a performance boundary. For example a direct mapped memory side cache might rotate cache colors at 1GB boundaries. With dis-contiguous allocations a device-dax instance could be configured to contain only 1 cache color. It also satisfies Joao's use case (see link) for partitioning memory for exclusive guest access. It allows for a future potential mode where the host kernel need not allocate 'struct page' capacity up-front. Reported-by: Joao Martins Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Ard Biesheuvel Cc: Benjamin Herrenschmidt Cc: Ben Skeggs Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Brice Goglin Cc: Catalin Marinas Cc: Daniel Vetter Cc: Dave Hansen Cc: Dave Jiang Cc: David Airlie Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Hulk Robot Cc: Ingo Molnar Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jason Yan Cc: Jeff Moyer Cc: "Jérôme Glisse" Cc: Jia He Cc: Jonathan Cameron Cc: Juergen Gross Cc: kernel test robot Cc: Michael Ellerman Cc: Mike Rapoport Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Tom Lendacky Cc: Vishal Verma Cc: Vivek Goyal Cc: Wei Yang Cc: Will Deacon Link: https://lore.kernel.org/lkml/20200110190313.17144-1-joao.m.martins@oracle.com/ Link: https://lkml.kernel.org/r/159643104304.4062302.16561669534797528660.stgit@dwillia2-desk3.amr.corp.intel.com Link: https://lkml.kernel.org/r/160106116875.30709.11456649969327399771.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- drivers/dax/device.c | 55 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'drivers/dax/device.c') diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 5f808617672a..bf389712a20b 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -55,15 +55,22 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct range *range = &dev_dax->range; - phys_addr_t phys; - - phys = pgoff * PAGE_SIZE + range->start; - if (phys >= range->start && phys <= range->end) { + int i; + + for (i = 0; i < dev_dax->nr_range; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; + unsigned long long pgoff_end; + phys_addr_t phys; + + pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1; + if (pgoff < dax_range->pgoff || pgoff > pgoff_end) + continue; + phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; if (phys + size - 1 <= range->end) return phys; + break; } - return -1; } @@ -395,30 +402,40 @@ static void dev_dax_kill(void *dev_dax) int dev_dax_probe(struct dev_dax *dev_dax) { struct dax_device *dax_dev = dev_dax->dax_dev; - struct range *range = &dev_dax->range; struct device *dev = &dev_dax->dev; struct dev_pagemap *pgmap; struct inode *inode; struct cdev *cdev; void *addr; - int rc; - - /* 1:1 map region resource range to device-dax instance range */ - if (!devm_request_mem_region(dev, range->start, range_len(range), - dev_name(dev))) { - dev_warn(dev, "could not reserve range: %#llx - %#llx\n", - range->start, range->end); - return -EBUSY; - } + int rc, i; pgmap = dev_dax->pgmap; + if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1, + "static pgmap / multi-range device conflict\n")) + return -EINVAL; + if (!pgmap) { - pgmap = devm_kzalloc(dev, sizeof(*pgmap), GFP_KERNEL); + pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range) + * (dev_dax->nr_range - 1), GFP_KERNEL); if (!pgmap) return -ENOMEM; - pgmap->range = *range; - pgmap->nr_range = 1; + pgmap->nr_range = dev_dax->nr_range; + } + + for (i = 0; i < dev_dax->nr_range; i++) { + struct range *range = &dev_dax->ranges[i].range; + + if (!devm_request_mem_region(dev, range->start, + range_len(range), dev_name(dev))) { + dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n", + i, range->start, range->end); + return -EBUSY; + } + /* don't update the range for static pgmap */ + if (!dev_dax->pgmap) + pgmap->ranges[i] = *range; } + pgmap->type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, pgmap); if (IS_ERR(addr)) -- cgit v1.2.3