From 4a20bc3e207488064e08fc5d7220d6acf95c80dd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 8 Dec 2022 09:02:00 -0800 Subject: cxl/pci: Move tracepoint definitions to drivers/cxl/core/ CXL is using tracepoints for reporting RAS capability register payloads for AER events, and has plans to use tracepoints for the output payload of Get Poison List and Get Event Records commands. For organization purposes it would be nice to keep those all under a single + local CXL trace system. This also organization also potentially helps in the future when CXL drivers expand beyond generic memory expanders, however that would also entail a move away from the expander-specific cxl_dev_state context, save that for later. Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl' trace system, however, it is unlikely that a single platform will ever load both drivers simultaneously. Cc: Steven Rostedt Tested-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167051869176.436579.9728373544811641087.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/trace/events/cxl.h | 112 --------------------------------------------- 1 file changed, 112 deletions(-) delete mode 100644 include/trace/events/cxl.h (limited to 'include') diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h deleted file mode 100644 index ad085a2534ef..000000000000 --- a/include/trace/events/cxl.h +++ /dev/null @@ -1,112 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM cxl - -#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _CXL_EVENTS_H - -#include - -#define CXL_HEADERLOG_SIZE SZ_512 -#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) - -#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) -#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) -#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) -#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) -#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) -#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) -#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) -#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) -#define CXL_RAS_UC_REINIT_THRESH BIT(8) -#define CXL_RAS_UC_RSVD_ENCODE BIT(9) -#define CXL_RAS_UC_POISON BIT(10) -#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) -#define CXL_RAS_UC_INTERNAL_ERR BIT(14) -#define CXL_RAS_UC_IDE_TX_ERR BIT(15) -#define CXL_RAS_UC_IDE_RX_ERR BIT(16) - -#define show_uc_errs(status) __print_flags(status, " | ", \ - { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \ - { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \ - { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \ - { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ - { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \ - { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \ - { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \ - { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \ - { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \ - { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \ - { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \ - { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \ - { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ - { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ - { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ -) - -TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u32, status) - __field(u32, first_error) - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) - ), - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->status = status; - __entry->first_error = fe; - /* - * Embed the 512B headerlog data for user app retrieval and - * parsing, but no need to print this in the trace buffer. - */ - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); - ), - TP_printk("%s: status: '%s' first_error: '%s'", - __get_str(dev_name), - show_uc_errs(__entry->status), - show_uc_errs(__entry->first_error) - ) -); - -#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) -#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) -#define CXL_RAS_CE_CRC_THRESH BIT(2) -#define CLX_RAS_CE_RETRY_THRESH BIT(3) -#define CXL_RAS_CE_CACHE_POISON BIT(4) -#define CXL_RAS_CE_MEM_POISON BIT(5) -#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6) - -#define show_ce_errs(status) __print_flags(status, " | ", \ - { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ - { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \ - { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \ - { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \ - { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \ - { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ - { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ -) - -TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct device *dev, u32 status), - TP_ARGS(dev, status), - TP_STRUCT__entry( - __string(dev_name, dev_name(dev)) - __field(u32, status) - ), - TP_fast_assign( - __assign_str(dev_name, dev_name(dev)); - __entry->status = status; - ), - TP_printk("%s: status: '%s'", - __get_str(dev_name), show_ce_errs(__entry->status) - ) -); - -#endif /* _CXL_EVENTS_H */ - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE cxl -#include -- cgit v1.2.3 From 589c3357370a596ef7c99c00baca8ac799fce531 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 11 Dec 2022 23:06:19 -0800 Subject: PCI/CXL: Export native CXL error reporting control CXL _OSC Error Reporting Control is used by the OS to determine if Firmware has control of various CXL error reporting capabilities including the event logs. Expose the result of negotiating CXL Error Reporting Control in struct pci_host_bridge for consumption by the CXL drivers. Cc: Bjorn Helgaas Cc: Lukas Wunner Cc: linux-pci@vger.kernel.org Cc: linux-acpi@vger.kernel.org Signed-off-by: Ira Weiny Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20221212070627.1372402-2-ira.weiny@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 3 +++ drivers/pci/probe.c | 1 + include/linux/pci.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index b3c202d2a433..84030804a763 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -1047,6 +1047,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, if (!(root->osc_control_set & OSC_PCI_EXPRESS_DPC_CONTROL)) host_bridge->native_dpc = 0; + if (!(root->osc_ext_control_set & OSC_CXL_ERROR_REPORTING_CONTROL)) + host_bridge->native_cxl_error = 0; + /* * Evaluate the "PCI Boot Configuration" _DSM Function. If it * exists and returns 0, we must preserve any PCI resource diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 1779582fb500..f796dfb9b14b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -596,6 +596,7 @@ static void pci_init_host_bridge(struct pci_host_bridge *bridge) bridge->native_ltr = 1; bridge->native_dpc = 1; bridge->domain_nr = PCI_DOMAIN_NR_NOT_SET; + bridge->native_cxl_error = 1; device_initialize(&bridge->dev); } diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..22319ea71ab0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -578,6 +578,7 @@ struct pci_host_bridge { unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ unsigned int native_dpc:1; /* OS may use PCIe DPC */ + unsigned int native_cxl_error:1; /* OS may use CXL RAS/Events */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ unsigned int msi_domain:1; /* Bridge wants MSI domain */ -- cgit v1.2.3 From 93c177fd6ff0655a5fa43ec945a57d7b0200ad80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:21 -0800 Subject: kernel/range: Uplevel the cxl subsystem's range_contains() helper In support of the CXL subsystem's use of 'struct range' to track decode address ranges, add a common range_contains() implementation with identical semantics as resource_contains(); The existing 'range_contains()' in lib/stackinit_kunit.c is namespaced with a 'stackinit_' prefix. Cc: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998163.1924368.6067392174077323935.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 5 ----- include/linux/range.h | 5 +++++ lib/stackinit_kunit.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 1d1492440287..9ed2120dbf8a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -214,11 +214,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) return devm_add_action_or_reset(host, clear_mem_enable, cxlds); } -static bool range_contains(struct range *r1, struct range *r2) -{ - return r1->start <= r2->start && r1->end >= r2->end; -} - /* require dvsec ranges to be covered by a locked platform window */ static int dvsec_range_allowed(struct device *dev, void *arg) { diff --git a/include/linux/range.h b/include/linux/range.h index 274681cc3154..7efb6a9b069b 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,6 +13,11 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +static inline bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index 4591d6cf5e01..05947a2feb93 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -31,8 +31,8 @@ static volatile u8 forced_mask = 0xff; static void *fill_start, *target_start; static size_t fill_size, target_size; -static bool range_contains(char *haystack_start, size_t haystack_size, - char *needle_start, size_t needle_size) +static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, + char *needle_start, size_t needle_size) { if (needle_start >= haystack_start && needle_start + needle_size <= haystack_start + haystack_size) @@ -175,7 +175,7 @@ static noinline void test_ ## name (struct kunit *test) \ \ /* Validate that compiler lined up fill and target. */ \ KUNIT_ASSERT_TRUE_MSG(test, \ - range_contains(fill_start, fill_size, \ + stackinit_range_contains(fill_start, fill_size, \ target_start, target_size), \ "stack fill missed target!? " \ "(fill %zu wide, target offset by %d)\n", \ -- cgit v1.2.3 From fe098574a93b4e2acb046b583e9857337d807f38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:02 -0800 Subject: dax/hmem: Convey the dax range via memregion_info() In preparation for hmem platform devices to be unregistered, stop using platform_device_add_resources() to convey the address range. The platform_device_add_resources() API causes an existing "Soft Reserved" iomem resource to be re-parented under an inserted platform device resource. When that platform device is deleted it removes the platform device resource and all children. Instead, it is sufficient to convey just the address range and let request_mem_region() insert resources to indicate the devices active in the range. This allows the "Soft Reserved" resource to be re-enumerated upon the next probe event. Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602002217.1924368.7036275892522551624.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/device.c | 37 ++++++++++++++----------------------- drivers/dax/hmem/hmem.c | 14 +++----------- include/linux/memregion.h | 2 ++ 3 files changed, 19 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 20749c7fab81..b1b339bccfe5 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -15,15 +15,8 @@ static struct resource hmem_active = { .flags = IORESOURCE_MEM, }; -void hmem_register_device(int target_nid, struct resource *r) +void hmem_register_device(int target_nid, struct resource *res) { - /* define a clean / non-busy resource for the platform device */ - struct resource res = { - .start = r->start, - .end = r->end, - .flags = IORESOURCE_MEM, - .desc = IORES_DESC_SOFT_RESERVED, - }; struct platform_device *pdev; struct memregion_info info; int rc, id; @@ -31,55 +24,53 @@ void hmem_register_device(int target_nid, struct resource *r) if (nohmem) return; - rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM, - IORES_DESC_SOFT_RESERVED); + rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_SOFT_RESERVED); if (rc != REGION_INTERSECTS) return; id = memregion_alloc(GFP_KERNEL); if (id < 0) { - pr_err("memregion allocation failure for %pr\n", &res); + pr_err("memregion allocation failure for %pr\n", res); return; } pdev = platform_device_alloc("hmem", id); if (!pdev) { - pr_err("hmem device allocation failure for %pr\n", &res); + pr_err("hmem device allocation failure for %pr\n", res); goto out_pdev; } - if (!__request_region(&hmem_active, res.start, resource_size(&res), + if (!__request_region(&hmem_active, res->start, resource_size(res), dev_name(&pdev->dev), 0)) { - dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + dev_dbg(&pdev->dev, "hmem range %pr already active\n", res); goto out_active; } pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, + .range = { + .start = res->start, + .end = res->end, + }, }; rc = platform_device_add_data(pdev, &info, sizeof(info)); if (rc < 0) { - pr_err("hmem memregion_info allocation failure for %pr\n", &res); - goto out_resource; - } - - rc = platform_device_add_resources(pdev, &res, 1); - if (rc < 0) { - pr_err("hmem resource allocation failure for %pr\n", &res); + pr_err("hmem memregion_info allocation failure for %pr\n", res); goto out_resource; } rc = platform_device_add(pdev); if (rc < 0) { - dev_err(&pdev->dev, "device add failed for %pr\n", &res); + dev_err(&pdev->dev, "device add failed for %pr\n", res); goto out_resource; } return; out_resource: - __release_region(&hmem_active, res.start, resource_size(&res)); + __release_region(&hmem_active, res->start, resource_size(res)); out_active: platform_device_put(pdev); out_pdev: diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index c7351e0dc8ff..5025a8c9850b 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -15,25 +15,17 @@ static int dax_hmem_probe(struct platform_device *pdev) struct memregion_info *mri; struct dev_dax_data data; struct dev_dax *dev_dax; - struct resource *res; - struct range range; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENOMEM; mri = dev->platform_data; - range.start = res->start; - range.end = res->end; - dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node, - PMD_SIZE, 0); + dax_region = alloc_dax_region(dev, pdev->id, &mri->range, + mri->target_node, PMD_SIZE, 0); if (!dax_region) return -ENOMEM; data = (struct dev_dax_data) { .dax_region = dax_region, .id = -1, - .size = region_idle ? 0 : resource_size(res), + .size = region_idle ? 0 : range_len(&mri->range), }; dev_dax = devm_create_dev_dax(&data); if (IS_ERR(dev_dax)) diff --git a/include/linux/memregion.h b/include/linux/memregion.h index bf83363807ac..c01321467789 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,10 +3,12 @@ #define _MEMREGION_H_ #include #include +#include #include struct memregion_info { int target_node; + struct range range; }; #ifdef CONFIG_MEMREGION -- cgit v1.2.3 From 7dab174e2e27eeaf10273e597ffbef4f8ea032bb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:07 -0800 Subject: dax/hmem: Move hmem device registration to dax_hmem.ko In preparation for the CXL region driver to take over the responsibility of registering device-dax instances for CXL regions, move the registration of "hmem" devices to dax_hmem.ko. Previously the builtin component of this enabling (drivers/dax/hmem/device.o) would register platform devices for each address range and trigger the dax_hmem.ko module to load and attach device-dax instances to those devices. Now, the ranges are collected from the HMAT and EFI memory map walking, but the device creation is deferred. A new "hmem_platform" device is created which triggers dax_hmem.ko to load and register the platform devices. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602002771.1924368.5653558226424530127.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 2 +- drivers/dax/Kconfig | 2 +- drivers/dax/hmem/device.c | 91 +++++++++++++++++++--------------------- drivers/dax/hmem/hmem.c | 105 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/dax.h | 7 +++- 5 files changed, 155 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index ff24282301ab..bba268ecd802 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -718,7 +718,7 @@ static void hmat_register_target_devices(struct memory_target *target) for (res = target->memregions.child; res; res = res->sibling) { int target_nid = pxm_to_node(target->memory_pxm); - hmem_register_device(target_nid, res); + hmem_register_resource(target_nid, res); } } diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 5fdf269a822e..d13c889c2a64 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -46,7 +46,7 @@ config DEV_DAX_HMEM Say M if unsure. config DEV_DAX_HMEM_DEVICES - depends on DEV_DAX_HMEM && DAX=y + depends on DEV_DAX_HMEM && DAX def_bool y config DEV_DAX_KMEM diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index b1b339bccfe5..f9e1a76a04a9 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,8 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static bool platform_initialized; +static DEFINE_MUTEX(hmem_resource_lock); static struct resource hmem_active = { .name = "HMEM devices", .start = 0, @@ -15,71 +17,66 @@ static struct resource hmem_active = { .flags = IORESOURCE_MEM, }; -void hmem_register_device(int target_nid, struct resource *res) +int walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + struct resource *res; + int rc = 0; + + mutex_lock(&hmem_resource_lock); + for (res = hmem_active.child; res; res = res->sibling) { + rc = fn(host, (int) res->desc, res); + if (rc) + break; + } + mutex_unlock(&hmem_resource_lock); + return rc; +} +EXPORT_SYMBOL_GPL(walk_hmem_resources); + +static void __hmem_register_resource(int target_nid, struct resource *res) { struct platform_device *pdev; - struct memregion_info info; - int rc, id; + struct resource *new; + int rc; - if (nohmem) + new = __request_region(&hmem_active, res->start, resource_size(res), "", + 0); + if (!new) { + pr_debug("hmem range %pr already active\n", res); return; + } - rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, - IORES_DESC_SOFT_RESERVED); - if (rc != REGION_INTERSECTS) - return; + new->desc = target_nid; - id = memregion_alloc(GFP_KERNEL); - if (id < 0) { - pr_err("memregion allocation failure for %pr\n", res); + if (platform_initialized) return; - } - pdev = platform_device_alloc("hmem", id); + pdev = platform_device_alloc("hmem_platform", 0); if (!pdev) { - pr_err("hmem device allocation failure for %pr\n", res); - goto out_pdev; - } - - if (!__request_region(&hmem_active, res->start, resource_size(res), - dev_name(&pdev->dev), 0)) { - dev_dbg(&pdev->dev, "hmem range %pr already active\n", res); - goto out_active; - } - - pdev->dev.numa_node = numa_map_to_online_node(target_nid); - info = (struct memregion_info) { - .target_node = target_nid, - .range = { - .start = res->start, - .end = res->end, - }, - }; - rc = platform_device_add_data(pdev, &info, sizeof(info)); - if (rc < 0) { - pr_err("hmem memregion_info allocation failure for %pr\n", res); - goto out_resource; + pr_err_once("failed to register device-dax hmem_platform device\n"); + return; } rc = platform_device_add(pdev); - if (rc < 0) { - dev_err(&pdev->dev, "device add failed for %pr\n", res); - goto out_resource; - } + if (rc) + platform_device_put(pdev); + else + platform_initialized = true; +} - return; +void hmem_register_resource(int target_nid, struct resource *res) +{ + if (nohmem) + return; -out_resource: - __release_region(&hmem_active, res->start, resource_size(res)); -out_active: - platform_device_put(pdev); -out_pdev: - memregion_free(id); + mutex_lock(&hmem_resource_lock); + __hmem_register_resource(target_nid, res); + mutex_unlock(&hmem_resource_lock); } static __init int hmem_register_one(struct resource *res, void *data) { - hmem_register_device(phys_to_target_node(res->start), res); + hmem_register_resource(phys_to_target_node(res->start), res); return 0; } diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 5025a8c9850b..e7bdff3132fa 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "../bus.h" static bool region_idle; @@ -43,8 +44,110 @@ static struct platform_driver dax_hmem_driver = { }, }; -module_platform_driver(dax_hmem_driver); +static void release_memregion(void *data) +{ + memregion_free((long) data); +} + +static void release_hmem(void *pdev) +{ + platform_device_unregister(pdev); +} + +static int hmem_register_device(struct device *host, int target_nid, + const struct resource *res) +{ + struct platform_device *pdev; + struct memregion_info info; + long id; + int rc; + + rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_SOFT_RESERVED); + if (rc != REGION_INTERSECTS) + return 0; + + id = memregion_alloc(GFP_KERNEL); + if (id < 0) { + dev_err(host, "memregion allocation failure for %pr\n", res); + return -ENOMEM; + } + rc = devm_add_action_or_reset(host, release_memregion, (void *) id); + if (rc) + return rc; + + pdev = platform_device_alloc("hmem", id); + if (!pdev) { + dev_err(host, "device allocation failure for %pr\n", res); + return -ENOMEM; + } + + pdev->dev.numa_node = numa_map_to_online_node(target_nid); + info = (struct memregion_info) { + .target_node = target_nid, + .range = { + .start = res->start, + .end = res->end, + }, + }; + rc = platform_device_add_data(pdev, &info, sizeof(info)); + if (rc < 0) { + dev_err(host, "memregion_info allocation failure for %pr\n", + res); + goto out_put; + } + + rc = platform_device_add(pdev); + if (rc < 0) { + dev_err(host, "%s add failed for %pr\n", dev_name(&pdev->dev), + res); + goto out_put; + } + + return devm_add_action_or_reset(host, release_hmem, pdev); + +out_put: + platform_device_put(pdev); + return rc; +} + +static int dax_hmem_platform_probe(struct platform_device *pdev) +{ + return walk_hmem_resources(&pdev->dev, hmem_register_device); +} + +static struct platform_driver dax_hmem_platform_driver = { + .probe = dax_hmem_platform_probe, + .driver = { + .name = "hmem_platform", + }, +}; + +static __init int dax_hmem_init(void) +{ + int rc; + + rc = platform_driver_register(&dax_hmem_platform_driver); + if (rc) + return rc; + + rc = platform_driver_register(&dax_hmem_driver); + if (rc) + platform_driver_unregister(&dax_hmem_platform_driver); + + return rc; +} + +static __exit void dax_hmem_exit(void) +{ + platform_driver_unregister(&dax_hmem_driver); + platform_driver_unregister(&dax_hmem_platform_driver); +} + +module_init(dax_hmem_init); +module_exit(dax_hmem_exit); MODULE_ALIAS("platform:hmem*"); +MODULE_ALIAS("platform:hmem_platform*"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); diff --git a/include/linux/dax.h b/include/linux/dax.h index 2b5ecb591059..bf6258472e49 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_device(int target_nid, struct resource *r); +void hmem_register_resource(int target_nid, struct resource *r); #else -static inline void hmem_register_device(int target_nid, struct resource *r) +static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif -- cgit v1.2.3 From 11ef026e467b05eac0a2ceb981d351ddc4b12216 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 2 Feb 2023 17:04:25 -0800 Subject: cxl/uapi: Add warning on CXL command enum The CXL command enum is exported to user space and must maintain backwards compatibility. Add comment that new defines must be added to the end of the list. Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221222-cxl-misc-v4-2-62f701c1cdd1@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index c71021a2a9ed..459a3f7f764b 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -19,6 +19,10 @@ #define CXL_MEM_QUERY_COMMANDS _IOR(0xCE, 1, struct cxl_mem_query_commands) #define CXL_MEM_SEND_COMMAND _IOWR(0xCE, 2, struct cxl_send_command) +/* + * NOTE: New defines must be added to the end of the list to preserve + * compatibility because this enum is exported to user space. + */ #define CXL_CMDS \ ___C(INVALID, "Invalid Command"), \ ___C(IDENTIFY, "Identify Command"), \ -- cgit v1.2.3 From 814a15f3b4131d3205bd47e23b50ccc6c666ce1d Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 2 Feb 2023 17:04:26 -0800 Subject: cxl/uapi: Tag commands from cxl_query_cmd() It was pointed out that commands not supported by the device or excluded by the kernel were being returned in cxl_query_cmd().[1] While libcxl correctly handles failing commands, it is more efficient to not issue an invalid command in the first place. This can't be done without additional information being returned from cxl_query_cmd(). In addition, information about the availability of commands can be useful for debugging. Add flags to struct cxl_command_info which reflect if a command is enabled and/or exclusive to the kernel. [1] https://lore.kernel.org/all/63b4ec4e37cc1_5178e2941d@dwillia2-xfh.jf.intel.com.notmuch/ Suggested-by: Dan Williams Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221222-cxl-misc-v4-3-62f701c1cdd1@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 9 +++++++-- include/uapi/linux/cxl_mem.h | 19 +++++++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 202d49dd9911..fd3b13b0fb7f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -451,9 +451,14 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * structures. */ cxl_for_each_cmd(cmd) { - const struct cxl_command_info *info = &cmd->info; + struct cxl_command_info info = cmd->info; - if (copy_to_user(&q->commands[j++], info, sizeof(*info))) + if (test_bit(info.id, cxlmd->cxlds->enabled_cmds)) + info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; + if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds)) + info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; + + if (copy_to_user(&q->commands[j++], &info, sizeof(info))) return -EFAULT; if (j == n_commands) diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 459a3f7f764b..9fe832afee37 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -72,6 +72,19 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. + * + * CXL_MEM_COMMAND_FLAG_USER_ENABLED + * + * The given command id is supported by the driver and is supported by + * a related opcode on the device. + * + * CXL_MEM_COMMAND_FLAG_EXCLUSIVE + * + * Requests with the given command id will terminate with EBUSY as the + * kernel actively owns management of the given resource. For example, + * the label-storage-area can not be written while the kernel is + * actively managing that space. + * * @size_in: Expected input size, or ~0 if variable length. * @size_out: Expected output size, or ~0 if variable length. * @@ -81,7 +94,7 @@ static const struct { * bytes of output. * * - @id = 10 - * - @flags = 0 + * - @flags = CXL_MEM_COMMAND_FLAG_ENABLED * - @size_in = ~0 * - @size_out = 0 * @@ -91,7 +104,9 @@ struct cxl_command_info { __u32 id; __u32 flags; -#define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) +#define CXL_MEM_COMMAND_FLAG_MASK GENMASK(1, 0) +#define CXL_MEM_COMMAND_FLAG_ENABLED BIT(0) +#define CXL_MEM_COMMAND_FLAG_EXCLUSIVE BIT(1) __u32 size_in; __u32 size_out; -- cgit v1.2.3 From af73370dcbe584f44168872b0bacac899f3c48f2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 2 Feb 2023 17:04:27 -0800 Subject: cxl/mem: Fix UAPI command comment The command comment had grammatical errors. In an attempt to fix those it was noted that the comment and the query command were not in sync. Now that the query command returns excluded and device unsupported command information. Update the kdoc and fix the grammatical errors. [1] https://lore.kernel.org/all/63b4ec4e37cc1_5178e2941d@dwillia2-xfh.jf.intel.com.notmuch/ Reviewed-by: Jonathan Cameron Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20221222-cxl-misc-v4-4-62f701c1cdd1@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 9fe832afee37..86bbacf2a315 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -11,9 +11,10 @@ /** * DOC: UAPI * - * Not all of all commands that the driver supports are always available for use - * by userspace. Userspace must check the results from the QUERY command in - * order to determine the live set of commands. + * Not all of the commands that the driver supports are available for use by + * userspace at all times. Userspace can check the result of the QUERY command + * to determine the live set of commands. Alternatively, it can issue the + * command and check for failure. */ #define CXL_MEM_QUERY_COMMANDS _IOR(0xCE, 1, struct cxl_mem_query_commands) -- cgit v1.2.3 From f57aec443c24d2e8e1f3b5b4856aea12ddda4254 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Feb 2023 17:01:05 -0800 Subject: cxl/pmem: Fix nvdimm registration races A loop of the form: while true; do modprobe cxl_pci; modprobe -r cxl_pci; done ...fails with the following crash signature: BUG: kernel NULL pointer dereference, address: 0000000000000040 [..] RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core] [..] Call Trace: cxl_pmem_ctl+0x121/0x240 [cxl_pmem] nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm] nd_label_data_init+0x135/0x7e0 [libnvdimm] nvdimm_probe+0xd6/0x1c0 [libnvdimm] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __device_attach_driver+0x85/0x110 bus_for_each_drv+0x7d/0xc0 __device_attach+0xb4/0x1e0 bus_probe_device+0x9f/0xc0 device_add+0x445/0x9c0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x30/0x130 ...namely that the bottom half of async nvdimm device registration runs after the CXL has already torn down the context that cxl_pmem_ctl() needs. Unlike the ACPI NFIT case that benefits from launching multiple nvdimm device registrations in parallel from those listed in the table, CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a synchronous registration path to preclude this scenario. Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices") Cc: Reported-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 1 + drivers/nvdimm/bus.c | 19 ++++++++++++++++--- drivers/nvdimm/dimm_devs.c | 5 ++++- drivers/nvdimm/nd-core.h | 1 + include/linux/libnvdimm.h | 3 +++ 5 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 08bbbac9a6d0..71cfa1fdf902 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev) return rc; set_bit(NDD_LABELING, &flags); + set_bit(NDD_REGISTER_SYNC, &flags); set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index b38d0355b0ac..5ad49056921b 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -508,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) put_device(dev); } -void nd_device_register(struct device *dev) +static void __nd_device_register(struct device *dev, bool sync) { if (!dev) return; @@ -531,11 +531,24 @@ void nd_device_register(struct device *dev) } get_device(dev); - async_schedule_dev_domain(nd_async_device_register, dev, - &nd_async_domain); + if (sync) + nd_async_device_register(dev, 0); + else + async_schedule_dev_domain(nd_async_device_register, dev, + &nd_async_domain); +} + +void nd_device_register(struct device *dev) +{ + __nd_device_register(dev, false); } EXPORT_SYMBOL(nd_device_register); +void nd_device_register_sync(struct device *dev) +{ + __nd_device_register(dev, true); +} + void nd_device_unregister(struct device *dev, enum nd_async_mode mode) { bool killed; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 1fc081dcf631..6d3b03a9fa02 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -624,7 +624,10 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); device_initialize(dev); lockdep_set_class(&dev->mutex, &nvdimm_key); - nd_device_register(dev); + if (test_bit(NDD_REGISTER_SYNC, &flags)) + nd_device_register_sync(dev); + else + nd_device_register(dev); return nvdimm; } diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index cc86ee09d7c0..845408f10655 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -107,6 +107,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); void nd_device_register(struct device *dev); +void nd_device_register_sync(struct device *dev); struct nd_label_id; char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, u32 flags); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af38252ad704..e772aae71843 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,6 +41,9 @@ enum { */ NDD_INCOHERENT = 7, + /* dimm provider wants synchronous registration by __nvdimm_create() */ + NDD_REGISTER_SYNC = 8, + /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, -- cgit v1.2.3 From 248529edc86f8d7d390a15a86bd1904951311665 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 14 Feb 2023 10:00:24 -0700 Subject: cxl: add RAS status unmasking for CXL By default the CXL RAS mask registers bits are defaulted to 1's and suppress all error reporting. If the kernel has negotiated ownership of error handling for CXL then unmask the mask registers by writing 0s. PCI_EXP_DEVCTL capability is checked to see uncorrectable or correctable errors bits are set before unmasking the respective errors. Acked-by: Bjorn Helgaas # pci_regs.h Reviewed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/167639402301.778884.12556849214955646539.stgit@djiang5-mobl3.local Signed-off-by: Dan Williams --- drivers/cxl/cxl.h | 1 + drivers/cxl/pci.c | 65 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/pci_regs.h | 1 + 3 files changed, 67 insertions(+) (limited to 'include') diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index aa3af3bb73b2..197ecffce4d0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index cc8b5a766a82..a509640994d7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -412,6 +412,67 @@ static bool is_cxl_restricted(struct pci_dev *pdev) return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; } +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u16 lnksta2; + + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + +static int cxl_pci_ras_unmask(struct pci_dev *pdev) +{ + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + void __iomem *addr; + u32 orig_val, val, mask; + u16 cap; + int rc; + + if (!cxlds->regs.ras) { + dev_dbg(&pdev->dev, "No RAS registers.\n"); + return 0; + } + + /* BIOS has CXL error control */ + if (!host_bridge->native_cxl_error) + return -ENXIO; + + rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); + if (rc) + return rc; + + if (cap & PCI_EXP_DEVCTL_URRE) { + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + + mask = CXL_RAS_UNCORRECTABLE_MASK_MASK; + if (!cxl_pci_flit_256(pdev)) + mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + val = orig_val & ~mask; + writel(val, addr); + dev_dbg(&pdev->dev, + "Uncorrectable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + if (cap & PCI_EXP_DEVCTL_CERE) { + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; + writel(val, addr); + dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -489,6 +550,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + rc = cxl_pci_ras_unmask(pdev); + if (rc) + dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); + pci_save_state(pdev); return rc; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 85ab1278811e..dc2000e0fe3a 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -693,6 +693,7 @@ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ -- cgit v1.2.3