summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 00:55:31 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 00:55:31 +0300
commitc1f0fcd85d3d66f002fc1a4986363840fcca766d (patch)
tree414ad8ea3b38a33585de78686528d4c99e89598e /drivers
parent691806e977a3a64895bd891878ed726cdbd282c0 (diff)
parentf04facfb993de47e2133b2b842d72b97b1c50162 (diff)
downloadlinux-c1f0fcd85d3d66f002fc1a4986363840fcca766d.tar.xz
Merge tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl updates from Dan Williams: "Compute Express Link (CXL) updates for 6.2. While it may seem backwards, the CXL update this time around includes some focus on CXL 1.x enabling where the work to date had been with CXL 2.0 (VH topologies) in mind. First generation CXL can mostly be supported via BIOS, similar to DDR, however it became clear there are use cases for OS native CXL error handling and some CXL 3.0 endpoint features can be deployed on CXL 1.x hosts (Restricted CXL Host (RCH) topologies). So, this update brings RCH topologies into the Linux CXL device model. In support of the ongoing CXL 2.0+ enabling two new core kernel facilities are added. One is the ability for the kernel to flag collisions between userspace access to PCI configuration registers and kernel accesses. This is brought on by the PCIe Data-Object-Exchange (DOE) facility, a hardware mailbox over config-cycles. The other is a cpu_cache_invalidate_memregion() API that maps to wbinvd_on_all_cpus() on x86. To prevent abuse it is disabled in guest VMs and architectures that do not support it yet. The CXL paths that need it, dynamic memory region creation and security commands (erase / unlock), are disabled when it is not present. As for the CXL 2.0+ this cycle the subsystem gains support Persistent Memory Security commands, error handling in response to PCIe AER notifications, and support for the "XOR" host bridge interleave algorithm. Summary: - Add the cpu_cache_invalidate_memregion() API for cache flushing in response to physical memory reconfiguration, or memory-side data invalidation from operations like secure erase or memory-device unlock. - Add a facility for the kernel to warn about collisions between kernel and userspace access to PCI configuration registers - Add support for Restricted CXL Host (RCH) topologies (formerly CXL 1.1) - Add handling and reporting of CXL errors reported via the PCIe AER mechanism - Add support for CXL Persistent Memory Security commands - Add support for the "XOR" algorithm for CXL host bridge interleave - Rework / simplify CXL to NVDIMM interactions - Miscellaneous cleanups and fixes" * tag 'cxl-for-6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (71 commits) cxl/region: Fix memdev reuse check cxl/pci: Remove endian confusion cxl/pci: Add some type-safety to the AER trace points cxl/security: Drop security command ioctl uapi cxl/mbox: Add variable output size validation for internal commands cxl/mbox: Enable cxl_mbox_send_cmd() users to validate output size cxl/security: Fix Get Security State output payload endian handling cxl: update names for interleave ways conversion macros cxl: update names for interleave granularity conversion macros cxl/acpi: Warn about an invalid CHBCR in an existing CHBS entry tools/testing/cxl: Require cache invalidation bypass cxl/acpi: Fail decoder add if CXIMS for HBIG is missing cxl/region: Fix spelling mistake "memergion" -> "memregion" cxl/regs: Fix sparse warning cxl/acpi: Set ACPI's CXL _OSC to indicate RCD mode support tools/testing/cxl: Add an RCH topology cxl/port: Add RCD endpoint port enumeration cxl/mem: Move devm_cxl_add_endpoint() from cxl_core to cxl_mem tools/testing/cxl: Add XOR Math support to cxl_test cxl/acpi: Support CXL XOR Interleave Math (CXIMS) ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/nfit/intel.c30
-rw-r--r--drivers/acpi/pci_root.c1
-rw-r--r--drivers/cxl/Kconfig18
-rw-r--r--drivers/cxl/Makefile2
-rw-r--r--drivers/cxl/acpi.c275
-rw-r--r--drivers/cxl/core/core.h8
-rw-r--r--drivers/cxl/core/hdm.c45
-rw-r--r--drivers/cxl/core/mbox.c102
-rw-r--r--drivers/cxl/core/memdev.c1
-rw-r--r--drivers/cxl/core/pci.c5
-rw-r--r--drivers/cxl/core/pmem.c109
-rw-r--r--drivers/cxl/core/port.c216
-rw-r--r--drivers/cxl/core/region.c112
-rw-r--r--drivers/cxl/core/regs.c242
-rw-r--r--drivers/cxl/cxl.h140
-rw-r--r--drivers/cxl/cxlmem.h64
-rw-r--r--drivers/cxl/cxlpci.h9
-rw-r--r--drivers/cxl/mem.c74
-rw-r--r--drivers/cxl/pci.c228
-rw-r--r--drivers/cxl/pmem.c407
-rw-r--r--drivers/cxl/security.c202
-rw-r--r--drivers/nvdimm/Kconfig12
-rw-r--r--drivers/nvdimm/dimm_devs.c9
-rw-r--r--drivers/nvdimm/region.c11
-rw-r--r--drivers/nvdimm/region_devs.c50
-rw-r--r--drivers/nvdimm/security.c43
-rw-r--r--drivers/pci/pci-sysfs.c7
-rw-r--r--drivers/pci/pcie/aer.c8
-rw-r--r--drivers/pci/probe.c6
29 files changed, 1672 insertions, 764 deletions
diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index 8dd792a55730..3902759abcba 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -3,6 +3,7 @@
#include <linux/libnvdimm.h>
#include <linux/ndctl.h>
#include <linux/acpi.h>
+#include <linux/memregion.h>
#include <asm/smp.h>
#include "intel.h"
#include "nfit.h"
@@ -190,8 +191,6 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
}
}
-static void nvdimm_invalidate_cache(void);
-
static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
const struct nvdimm_key_data *key_data)
{
@@ -227,9 +226,6 @@ static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm,
return -EIO;
}
- /* DIMM unlocked, invalidate all CPU caches before we read it */
- nvdimm_invalidate_cache();
-
return 0;
}
@@ -297,8 +293,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
if (!test_bit(cmd, &nfit_mem->dsm_mask))
return -ENOTTY;
- /* flush all cache before we erase DIMM */
- nvdimm_invalidate_cache();
memcpy(nd_cmd.cmd.passphrase, key->data,
sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -317,8 +311,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm,
return -ENXIO;
}
- /* DIMM erased, invalidate all CPU caches before we read it */
- nvdimm_invalidate_cache();
return 0;
}
@@ -354,8 +346,6 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm)
return -ENXIO;
}
- /* flush all cache before we make the nvdimms available */
- nvdimm_invalidate_cache();
return 0;
}
@@ -380,8 +370,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask))
return -ENOTTY;
- /* flush all cache before we erase DIMM */
- nvdimm_invalidate_cache();
memcpy(nd_cmd.cmd.passphrase, nkey->data,
sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -401,22 +389,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
}
}
-/*
- * TODO: define a cross arch wbinvd equivalent when/if
- * NVDIMM_FAMILY_INTEL command support arrives on another arch.
- */
-#ifdef CONFIG_X86
-static void nvdimm_invalidate_cache(void)
-{
- wbinvd_on_all_cpus();
-}
-#else
-static void nvdimm_invalidate_cache(void)
-{
- WARN_ON_ONCE("cache invalidation required after unlock\n");
-}
-#endif
-
static const struct nvdimm_security_ops __intel_security_ops = {
.get_flags = intel_security_flags,
.freeze = intel_security_freeze,
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 4e3db20e9cbb..b3c202d2a433 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -493,6 +493,7 @@ static u32 calculate_cxl_support(void)
u32 support;
support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT;
+ support |= OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT;
if (pci_aer_available())
support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT;
if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 768ced3d6fe8..0ac53c422c31 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -111,4 +111,22 @@ config CXL_REGION
select MEMREGION
select GET_FREE_REGION
+config CXL_REGION_INVALIDATION_TEST
+ bool "CXL: Region Cache Management Bypass (TEST)"
+ depends on CXL_REGION
+ help
+ CXL Region management and security operations potentially invalidate
+ the content of CPU caches without notifiying those caches to
+ invalidate the affected cachelines. The CXL Region driver attempts
+ to invalidate caches when those events occur. If that invalidation
+ fails the region will fail to enable. Reasons for cache
+ invalidation failure are due to the CPU not providing a cache
+ invalidation mechanism. For example usage of wbinvd is restricted to
+ bare metal x86. However, for testing purposes toggling this option
+ can disable that data integrity safety and proceed with enabling
+ regions when there might be conflicting contents in the CPU cache.
+
+ If unsure, or if this kernel is meant for production environments,
+ say N.
+
endif
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
index a78270794150..db321f48ba52 100644
--- a/drivers/cxl/Makefile
+++ b/drivers/cxl/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_CXL_PORT) += cxl_port.o
cxl_mem-y := mem.o
cxl_pci-y := pci.o
cxl_acpi-y := acpi.o
-cxl_pmem-y := pmem.o
+cxl_pmem-y := pmem.o security.o
cxl_port-y := port.o
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index fb649683dd3a..ad0849af42d7 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -6,9 +6,120 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/pci.h>
+#include <asm/div64.h>
#include "cxlpci.h"
#include "cxl.h"
+#define CXL_RCRB_SIZE SZ_8K
+
+struct cxl_cxims_data {
+ int nr_maps;
+ u64 xormaps[];
+};
+
+/*
+ * Find a targets entry (n) in the host bridge interleave list.
+ * CXL Specfication 3.0 Table 9-22
+ */
+static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
+ int ig)
+{
+ int i = 0, n = 0;
+ u8 eiw;
+
+ /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */
+ if (iw != 3) {
+ for (i = 0; i < cximsd->nr_maps; i++)
+ n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i;
+ }
+ /* IW: 3,6,12 add a modulo calculation to 'n' */
+ if (!is_power_of_2(iw)) {
+ if (ways_to_eiw(iw, &eiw))
+ return -1;
+ hpa &= GENMASK_ULL(51, eiw + ig);
+ n |= do_div(hpa, 3) << i;
+ }
+ return n;
+}
+
+static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
+{
+ struct cxl_cxims_data *cximsd = cxlrd->platform_data;
+ struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
+ struct cxl_decoder *cxld = &cxlsd->cxld;
+ int ig = cxld->interleave_granularity;
+ int iw = cxld->interleave_ways;
+ int n = 0;
+ u64 hpa;
+
+ if (dev_WARN_ONCE(&cxld->dev,
+ cxld->interleave_ways != cxlsd->nr_targets,
+ "misconfigured root decoder\n"))
+ return NULL;
+
+ hpa = cxlrd->res->start + pos * ig;
+
+ /* Entry (n) is 0 for no interleave (iw == 1) */
+ if (iw != 1)
+ n = cxl_xor_calc_n(hpa, cximsd, iw, ig);
+
+ if (n < 0)
+ return NULL;
+
+ return cxlrd->cxlsd.target[n];
+}
+
+struct cxl_cxims_context {
+ struct device *dev;
+ struct cxl_root_decoder *cxlrd;
+};
+
+static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg,
+ const unsigned long end)
+{
+ struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header;
+ struct cxl_cxims_context *ctx = arg;
+ struct cxl_root_decoder *cxlrd = ctx->cxlrd;
+ struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+ struct device *dev = ctx->dev;
+ struct cxl_cxims_data *cximsd;
+ unsigned int hbig, nr_maps;
+ int rc;
+
+ rc = eig_to_granularity(cxims->hbig, &hbig);
+ if (rc)
+ return rc;
+
+ /* Does this CXIMS entry apply to the given CXL Window? */
+ if (hbig != cxld->interleave_granularity)
+ return 0;
+
+ /* IW 1,3 do not use xormaps and skip this parsing entirely */
+ if (is_power_of_2(cxld->interleave_ways))
+ /* 2, 4, 8, 16 way */
+ nr_maps = ilog2(cxld->interleave_ways);
+ else
+ /* 6, 12 way */
+ nr_maps = ilog2(cxld->interleave_ways / 3);
+
+ if (cxims->nr_xormaps < nr_maps) {
+ dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n",
+ cxims->nr_xormaps, nr_maps);
+ return -ENXIO;
+ }
+
+ cximsd = devm_kzalloc(dev, struct_size(cximsd, xormaps, nr_maps),
+ GFP_KERNEL);
+ if (!cximsd)
+ return -ENOMEM;
+ memcpy(cximsd->xormaps, cxims->xormap_list,
+ nr_maps * sizeof(*cximsd->xormaps));
+ cximsd->nr_maps = nr_maps;
+ cxlrd->platform_data = cximsd;
+
+ return 0;
+}
+
static unsigned long cfmws_to_decoder_flags(int restrictions)
{
unsigned long flags = CXL_DECODER_F_ENABLE;
@@ -33,8 +144,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
int rc, expected_len;
unsigned int ways;
- if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
- dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
+ if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO &&
+ cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+ dev_err(dev, "CFMWS Unknown Interleave Arithmetic: %d\n",
+ cfmws->interleave_arithmetic);
return -EINVAL;
}
@@ -48,7 +161,7 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return -EINVAL;
}
- rc = cxl_to_ways(cfmws->interleave_ways, &ways);
+ rc = eiw_to_ways(cfmws->interleave_ways, &ways);
if (rc) {
dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n",
cfmws->interleave_ways);
@@ -70,6 +183,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev,
return 0;
}
+/*
+ * Note, @dev must be the first member, see 'struct cxl_chbs_context'
+ * and mock_acpi_table_parse_cedt()
+ */
struct cxl_cfmws_context {
struct device *dev;
struct cxl_port *root_port;
@@ -84,9 +201,11 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct resource *cxl_res = ctx->cxl_res;
+ struct cxl_cxims_context cxims_ctx;
struct cxl_root_decoder *cxlrd;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
+ cxl_calc_hb_fn cxl_calc_hb;
struct cxl_decoder *cxld;
unsigned int ways, i, ig;
struct resource *res;
@@ -102,10 +221,10 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
return 0;
}
- rc = cxl_to_ways(cfmws->interleave_ways, &ways);
+ rc = eiw_to_ways(cfmws->interleave_ways, &ways);
if (rc)
return rc;
- rc = cxl_to_granularity(cfmws->granularity, &ig);
+ rc = eig_to_granularity(cfmws->granularity, &ig);
if (rc)
return rc;
for (i = 0; i < ways; i++)
@@ -128,7 +247,12 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
if (rc)
goto err_insert;
- cxlrd = cxl_root_decoder_alloc(root_port, ways);
+ if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO)
+ cxl_calc_hb = cxl_hb_modulo;
+ else
+ cxl_calc_hb = cxl_hb_xor;
+
+ cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
if (IS_ERR(cxlrd))
return 0;
@@ -148,7 +272,25 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
ig = CXL_DECODER_MIN_GRANULARITY;
cxld->interleave_granularity = ig;
+ if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+ if (ways != 1 && ways != 3) {
+ cxims_ctx = (struct cxl_cxims_context) {
+ .dev = dev,
+ .cxlrd = cxlrd,
+ };
+ rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS,
+ cxl_parse_cxims, &cxims_ctx);
+ if (rc < 0)
+ goto err_xormap;
+ if (!cxlrd->platform_data) {
+ dev_err(dev, "No CXIMS for HBIG %u\n", ig);
+ rc = -EINVAL;
+ goto err_xormap;
+ }
+ }
+ }
rc = cxl_decoder_add(cxld, target_map);
+err_xormap:
if (rc)
put_device(&cxld->dev);
else
@@ -193,34 +335,39 @@ static int add_host_bridge_uport(struct device *match, void *arg)
{
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *bridge = to_cxl_host_bridge(host, match);
+ struct acpi_device *hb = to_cxl_host_bridge(host, match);
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
struct cxl_port *port;
+ struct device *bridge;
int rc;
- if (!bridge)
+ if (!hb)
return 0;
- dport = cxl_find_dport_by_dev(root_port, match);
+ pci_root = acpi_pci_find_root(hb->handle);
+ bridge = pci_root->bus->bridge;
+ dport = cxl_find_dport_by_dev(root_port, bridge);
if (!dport) {
dev_dbg(host, "host bridge expected and not found\n");
return 0;
}
- /*
- * Note that this lookup already succeeded in
- * to_cxl_host_bridge(), so no need to check for failure here
- */
- pci_root = acpi_pci_find_root(bridge->handle);
- rc = devm_cxl_register_pci_bus(host, match, pci_root->bus);
+ if (dport->rch) {
+ dev_info(bridge, "host supports CXL (restricted)\n");
+ return 0;
+ }
+
+ rc = devm_cxl_register_pci_bus(host, bridge, pci_root->bus);
if (rc)
return rc;
- port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport);
+ port = devm_cxl_add_port(host, bridge, dport->component_reg_phys,
+ dport);
if (IS_ERR(port))
return PTR_ERR(port);
- dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
+
+ dev_info(bridge, "host supports CXL\n");
return 0;
}
@@ -228,7 +375,9 @@ static int add_host_bridge_uport(struct device *match, void *arg)
struct cxl_chbs_context {
struct device *dev;
unsigned long long uid;
+ resource_size_t rcrb;
resource_size_t chbcr;
+ u32 cxl_version;
};
static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
@@ -244,51 +393,86 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
if (ctx->uid != chbs->uid)
return 0;
- ctx->chbcr = chbs->base;
+
+ ctx->cxl_version = chbs->cxl_version;
+ ctx->rcrb = CXL_RESOURCE_NONE;
+ ctx->chbcr = CXL_RESOURCE_NONE;
+
+ if (!chbs->base)
+ return 0;
+
+ if (chbs->cxl_version != ACPI_CEDT_CHBS_VERSION_CXL11) {
+ ctx->chbcr = chbs->base;
+ return 0;
+ }
+
+ if (chbs->length != CXL_RCRB_SIZE)
+ return 0;
+
+ ctx->rcrb = chbs->base;
+ ctx->chbcr = cxl_rcrb_to_component(ctx->dev, chbs->base,
+ CXL_RCRB_DOWNSTREAM);
return 0;
}
static int add_host_bridge_dport(struct device *match, void *arg)
{
- acpi_status status;
+ acpi_status rc;
+ struct device *bridge;
unsigned long long uid;
struct cxl_dport *dport;
struct cxl_chbs_context ctx;
+ struct acpi_pci_root *pci_root;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
- struct acpi_device *bridge = to_cxl_host_bridge(host, match);
+ struct acpi_device *hb = to_cxl_host_bridge(host, match);
- if (!bridge)
+ if (!hb)
return 0;
- status = acpi_evaluate_integer(bridge->handle, METHOD_NAME__UID, NULL,
- &uid);
- if (status != AE_OK) {
- dev_err(host, "unable to retrieve _UID of %s\n",
- dev_name(match));
+ rc = acpi_evaluate_integer(hb->handle, METHOD_NAME__UID, NULL, &uid);
+ if (rc != AE_OK) {
+ dev_err(match, "unable to retrieve _UID\n");
return -ENODEV;
}
+ dev_dbg(match, "UID found: %lld\n", uid);
+
ctx = (struct cxl_chbs_context) {
- .dev = host,
+ .dev = match,
.uid = uid,
};
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx);
- if (ctx.chbcr == 0) {
- dev_warn(host, "No CHBS found for Host Bridge: %s\n",
- dev_name(match));
+ if (!ctx.chbcr) {
+ dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n",
+ uid);
return 0;
}
- dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr);
- if (IS_ERR(dport)) {
- dev_err(host, "failed to add downstream port: %s\n",
- dev_name(match));
- return PTR_ERR(dport);
+ if (ctx.rcrb != CXL_RESOURCE_NONE)
+ dev_dbg(match, "RCRB found for UID %lld: %pa\n", uid, &ctx.rcrb);
+
+ if (ctx.chbcr == CXL_RESOURCE_NONE) {
+ dev_warn(match, "CHBCR invalid for Host Bridge (UID %lld)\n",
+ uid);
+ return 0;
}
- dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match));
+
+ dev_dbg(match, "CHBCR found: %pa\n", &ctx.chbcr);
+
+ pci_root = acpi_pci_find_root(hb->handle);
+ bridge = pci_root->bus->bridge;
+ if (ctx.cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11)
+ dport = devm_cxl_add_rch_dport(root_port, bridge, uid,
+ ctx.chbcr, ctx.rcrb);
+ else
+ dport = devm_cxl_add_dport(root_port, bridge, uid,
+ ctx.chbcr);
+ if (IS_ERR(dport))
+ return PTR_ERR(dport);
+
return 0;
}
@@ -466,7 +650,6 @@ static int cxl_acpi_probe(struct platform_device *pdev)
root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
if (IS_ERR(root_port))
return PTR_ERR(root_port);
- dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
add_host_bridge_dport);
@@ -512,7 +695,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
return rc;
/* In case PCI is scanned before ACPI re-trigger memdev attach */
- return cxl_bus_rescan();
+ cxl_bus_rescan();
+ return 0;
}
static const struct acpi_device_id cxl_acpi_ids[] = {
@@ -536,7 +720,20 @@ static struct platform_driver cxl_acpi_driver = {
.id_table = cxl_test_ids,
};
-module_platform_driver(cxl_acpi_driver);
+static int __init cxl_acpi_init(void)
+{
+ return platform_driver_register(&cxl_acpi_driver);
+}
+
+static void __exit cxl_acpi_exit(void)
+{
+ platform_driver_unregister(&cxl_acpi_driver);
+ cxl_bus_drain();
+}
+
+module_init(cxl_acpi_init);
+module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(ACPI);
+MODULE_SOFTDEP("pre: cxl_pmem");
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 1d8f87be283f..8c04672dca56 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -58,14 +58,6 @@ extern struct rw_semaphore cxl_dpa_rwsem;
bool is_switch_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
-static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
- struct cxl_memdev *cxlmd)
-{
- if (!port)
- return NULL;
-
- return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
-}
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index d1d2caea5c62..dcc16d7cb8f3 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -82,18 +82,23 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(14, 12);
}
-static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
- void __iomem *crb)
+static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
+ struct cxl_component_regs *regs)
{
- struct cxl_component_reg_map map;
+ struct cxl_register_map map = {
+ .resource = port->component_reg_phys,
+ .base = crb,
+ .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
+ };
- cxl_probe_component_regs(&port->dev, crb, &map);
- if (!map.hdm_decoder.valid) {
+ cxl_probe_component_regs(&port->dev, crb, &map.component_map);
+ if (!map.component_map.hdm_decoder.valid) {
dev_err(&port->dev, "HDM decoder registers invalid\n");
- return IOMEM_ERR_PTR(-ENXIO);
+ return -ENXIO;
}
- return crb + map.hdm_decoder.offset;
+ return cxl_map_component_regs(&port->dev, regs, &map,
+ BIT(CXL_CM_CAP_CAP_ID_HDM));
}
/**
@@ -103,25 +108,25 @@ static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
{
struct device *dev = &port->dev;
- void __iomem *crb, *hdm;
struct cxl_hdm *cxlhdm;
+ void __iomem *crb;
+ int rc;
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
- crb = devm_cxl_iomap_block(dev, port->component_reg_phys,
- CXL_COMPONENT_REG_BLOCK_SIZE);
+ crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
if (!crb) {
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
- hdm = map_hdm_decoder_regs(port, crb);
- if (IS_ERR(hdm))
- return ERR_CAST(hdm);
- cxlhdm->regs.hdm_decoder = hdm;
+ rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs);
+ iounmap(crb);
+ if (rc)
+ return ERR_PTR(rc);
parse_hdm_decoder_caps(cxlhdm);
if (cxlhdm->decoder_count == 0) {
@@ -489,10 +494,10 @@ static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
* Input validation ensures these warns never fire, but otherwise
* suppress unititalized variable usage warnings.
*/
- if (WARN_ONCE(ways_to_cxl(cxld->interleave_ways, &eiw),
+ if (WARN_ONCE(ways_to_eiw(cxld->interleave_ways, &eiw),
"invalid interleave_ways: %d\n", cxld->interleave_ways))
return;
- if (WARN_ONCE(granularity_to_cxl(cxld->interleave_granularity, &eig),
+ if (WARN_ONCE(granularity_to_eig(cxld->interleave_granularity, &eig),
"invalid interleave_granularity: %d\n",
cxld->interleave_granularity))
return;
@@ -736,16 +741,16 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
}
cxld->target_type = CXL_DECODER_EXPANDER;
}
- rc = cxl_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
- &cxld->interleave_ways);
+ rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
+ &cxld->interleave_ways);
if (rc) {
dev_warn(&port->dev,
"decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
port->id, cxld->id, ctrl);
return rc;
}
- rc = cxl_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
- &cxld->interleave_granularity);
+ rc = eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
+ &cxld->interleave_granularity);
if (rc)
return rc;
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 0c90f13870a4..b03fba212799 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -140,13 +140,9 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
}
/**
- * cxl_mbox_send_cmd() - Send a mailbox command to a device.
+ * cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
* @cxlds: The device data for the operation
- * @opcode: Opcode for the mailbox command.
- * @in: The input payload for the mailbox command.
- * @in_size: The length of the input payload
- * @out: Caller allocated buffer for the output.
- * @out_size: Expected size of output.
+ * @mbox_cmd: initialized command to execute
*
* Context: Any context.
* Return:
@@ -161,40 +157,40 @@ static const char *cxl_mem_opcode_to_name(u16 opcode)
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*/
-int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
- size_t in_size, void *out, size_t out_size)
+int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *mbox_cmd)
{
- const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
- struct cxl_mbox_cmd mbox_cmd = {
- .opcode = opcode,
- .payload_in = in,
- .size_in = in_size,
- .size_out = out_size,
- .payload_out = out,
- };
+ size_t out_size, min_out;
int rc;
- if (in_size > cxlds->payload_size || out_size > cxlds->payload_size)
+ if (mbox_cmd->size_in > cxlds->payload_size ||
+ mbox_cmd->size_out > cxlds->payload_size)
return -E2BIG;
- rc = cxlds->mbox_send(cxlds, &mbox_cmd);
+ out_size = mbox_cmd->size_out;
+ min_out = mbox_cmd->min_out;
+ rc = cxlds->mbox_send(cxlds, mbox_cmd);
if (rc)
return rc;
- if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS)
- return cxl_mbox_cmd_rc2errno(&mbox_cmd);
+ if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS)
+ return cxl_mbox_cmd_rc2errno(mbox_cmd);
+
+ if (!out_size)
+ return 0;
/*
- * Variable sized commands can't be validated and so it's up to the
- * caller to do that if they wish.
+ * Variable sized output needs to at least satisfy the caller's
+ * minimum if not the fully requested size.
*/
- if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) {
- if (mbox_cmd.size_out != out_size)
- return -EIO;
- }
+ if (min_out == 0)
+ min_out = out_size;
+
+ if (mbox_cmd->size_out < min_out)
+ return -EIO;
return 0;
}
-EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_internal_send_cmd, CXL);
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
@@ -561,15 +557,25 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8
while (remaining) {
u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
- struct cxl_mbox_get_log log = {
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_mbox_get_log log;
+ int rc;
+
+ log = (struct cxl_mbox_get_log) {
.uuid = *uuid,
.offset = cpu_to_le32(offset),
- .length = cpu_to_le32(xfer_size)
+ .length = cpu_to_le32(xfer_size),
};
- int rc;
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LOG, &log, sizeof(log),
- out, xfer_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_LOG,
+ .size_in = sizeof(log),
+ .payload_in = &log,
+ .size_out = xfer_size,
+ .payload_out = out,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
@@ -615,19 +621,27 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *ret;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 0, ret,
- cxlds->payload_size);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
+ .size_out = cxlds->payload_size,
+ .payload_out = ret,
+ /* At least the record number field must be valid */
+ .min_out = 2,
+ };
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
}
+
return ret;
}
@@ -697,7 +711,6 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
/* Found the required CEL */
rc = 0;
}
-
out:
kvfree(gsl);
return rc;
@@ -719,11 +732,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_partition_info pi;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
- &pi, sizeof(pi));
-
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_PARTITION_INFO,
+ .size_out = sizeof(pi),
+ .payload_out = &pi,
+ };
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc)
return rc;
@@ -752,10 +769,15 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
{
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
- sizeof(id));
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_IDENTIFY,
+ .size_out = sizeof(id),
+ .payload_out = &id,
+ };
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
if (rc < 0)
return rc;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 20ce488a7754..a74a93310d26 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -344,6 +344,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
* needed as this is ordered with cdev_add() publishing the device.
*/
cxlmd->cxlds = cxlds;
+ cxlds->cxlmd = cxlmd;
cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9240df53ed87..57764e9cd19d 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -54,16 +54,13 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
dev_dbg(&port->dev, "failed to find component registers\n");
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
- dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
- cxl_regmap_to_base(pdev, &map));
+ dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
if (IS_ERR(dport)) {
ctx->error = PTR_ERR(dport);
return PTR_ERR(dport);
}
ctx->count++;
- dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
-
return 0;
}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 36aa5070d902..f3d2169b6731 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -99,7 +99,6 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
dev = &cxl_nvb->dev;
cxl_nvb->port = port;
- cxl_nvb->state = CXL_NVB_NEW;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
device_set_pm_not_required(dev);
@@ -117,28 +116,7 @@ err:
static void unregister_nvb(void *_cxl_nvb)
{
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
- bool flush;
- /*
- * If the bridge was ever activated then there might be in-flight state
- * work to flush. Once the state has been changed to 'dead' then no new
- * work can be queued by user-triggered bind.
- */
- device_lock(&cxl_nvb->dev);
- flush = cxl_nvb->state != CXL_NVB_NEW;
- cxl_nvb->state = CXL_NVB_DEAD;
- device_unlock(&cxl_nvb->dev);
-
- /*
- * Even though the device core will trigger device_release_driver()
- * before the unregister, it does not know about the fact that
- * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
- * release not and flush it before tearing down the nvdimm device
- * hierarchy.
- */
- device_release_driver(&cxl_nvb->dev);
- if (flush)
- flush_work(&cxl_nvb->state_work);
device_unregister(&cxl_nvb->dev);
}
@@ -188,7 +166,6 @@ static void cxl_nvdimm_release(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
- xa_destroy(&cxl_nvd->pmem_regions);
kfree(cxl_nvd);
}
@@ -220,7 +197,8 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);
static struct lock_class_key cxl_nvdimm_key;
-static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
+static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
+ struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
@@ -231,38 +209,78 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
- xa_init(&cxl_nvd->pmem_regions);
+ cxlmd->cxl_nvd = cxl_nvd;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
dev->parent = &cxlmd->dev;
dev->bus = &cxl_bus_type;
dev->type = &cxl_nvdimm_type;
+ /*
+ * A "%llx" string is 17-bytes vs dimm_id that is max
+ * NVDIMM_KEY_DESC_LEN
+ */
+ BUILD_BUG_ON(sizeof(cxl_nvd->dev_id) < 17 ||
+ sizeof(cxl_nvd->dev_id) > NVDIMM_KEY_DESC_LEN);
+ sprintf(cxl_nvd->dev_id, "%llx", cxlmd->cxlds->serial);
return cxl_nvd;
}
-static void cxl_nvd_unregister(void *dev)
+static void cxl_nvd_unregister(void *_cxl_nvd)
{
- device_unregister(dev);
+ struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
+
+ /*
+ * Either the bridge is in ->remove() context under the device_lock(),
+ * or cxlmd_release_nvdimm() is cancelling the bridge's release action
+ * for @cxl_nvd and doing it itself (while manually holding the bridge
+ * lock).
+ */
+ device_lock_assert(&cxl_nvb->dev);
+ cxl_nvd->cxlmd = NULL;
+ cxlmd->cxl_nvd = NULL;
+ device_unregister(&cxl_nvd->dev);
+}
+
+static void cxlmd_release_nvdimm(void *_cxlmd)
+{
+ struct cxl_memdev *cxlmd = _cxlmd;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
+
+ device_lock(&cxl_nvb->dev);
+ if (cxlmd->cxl_nvd)
+ devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister,
+ cxlmd->cxl_nvd);
+ device_unlock(&cxl_nvb->dev);
+ put_device(&cxl_nvb->dev);
}
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
- * @host: same host as @cxlmd
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
-int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
+int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
{
+ struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
- cxl_nvd = cxl_nvdimm_alloc(cxlmd);
- if (IS_ERR(cxl_nvd))
- return PTR_ERR(cxl_nvd);
+ cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ if (!cxl_nvb)
+ return -ENODEV;
+
+ cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd);
+ if (IS_ERR(cxl_nvd)) {
+ rc = PTR_ERR(cxl_nvd);
+ goto err_alloc;
+ }
+ cxlmd->cxl_nvb = cxl_nvb;
dev = &cxl_nvd->dev;
rc = dev_set_name(dev, "pmem%d", cxlmd->id);
@@ -273,13 +291,34 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
if (rc)
goto err;
- dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
- dev_name(dev));
+ dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
+
+ /*
+ * The two actions below arrange for @cxl_nvd to be deleted when either
+ * the top-level PMEM bridge goes down, or the endpoint device goes
+ * through ->remove().
+ */
+ device_lock(&cxl_nvb->dev);
+ if (cxl_nvb->dev.driver)
+ rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister,
+ cxl_nvd);
+ else
+ rc = -ENXIO;
+ device_unlock(&cxl_nvb->dev);
- return devm_add_action_or_reset(host, cxl_nvd_unregister, dev);
+ if (rc)
+ goto err_alloc;
+
+ /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
+ return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
err:
put_device(dev);
+err_alloc:
+ cxlmd->cxl_nvb = NULL;
+ cxlmd->cxl_nvd = NULL;
+ put_device(&cxl_nvb->dev);
+
return rc;
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index e7556864ea80..b631a0520456 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -628,6 +628,8 @@ static struct cxl_port *cxl_port_alloc(struct device *uport,
iter = to_cxl_port(iter->dev.parent);
if (iter->host_bridge)
port->host_bridge = iter->host_bridge;
+ else if (parent_dport->rch)
+ port->host_bridge = parent_dport->dport;
else
port->host_bridge = iter->uport;
dev_dbg(uport, "host-bridge: %s\n", dev_name(port->host_bridge));
@@ -655,16 +657,10 @@ err:
return ERR_PTR(rc);
}
-/**
- * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
- * @host: host device for devm operations
- * @uport: "physical" device implementing this upstream port
- * @component_reg_phys: (optional) for configurable cxl_port instances
- * @parent_dport: next hop up in the CXL memory decode hierarchy
- */
-struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
- resource_size_t component_reg_phys,
- struct cxl_dport *parent_dport)
+static struct cxl_port *__devm_cxl_add_port(struct device *host,
+ struct device *uport,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
{
struct cxl_port *port;
struct device *dev;
@@ -702,6 +698,41 @@ err:
put_device(dev);
return ERR_PTR(rc);
}
+
+/**
+ * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
+ * @host: host device for devm operations
+ * @uport: "physical" device implementing this upstream port
+ * @component_reg_phys: (optional) for configurable cxl_port instances
+ * @parent_dport: next hop up in the CXL memory decode hierarchy
+ */
+struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *port, *parent_port;
+
+ port = __devm_cxl_add_port(host, uport, component_reg_phys,
+ parent_dport);
+
+ parent_port = parent_dport ? parent_dport->port : NULL;
+ if (IS_ERR(port)) {
+ dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n",
+ dev_name(&port->dev),
+ parent_port ? " to " : "",
+ parent_port ? dev_name(&parent_port->dev) : "",
+ parent_port ? "" : " (root port)",
+ PTR_ERR(port));
+ } else {
+ dev_dbg(uport, "%s added%s%s%s\n",
+ dev_name(&port->dev),
+ parent_port ? " to " : "",
+ parent_port ? dev_name(&parent_port->dev) : "",
+ parent_port ? "" : " (root port)");
+ }
+
+ return port;
+}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
@@ -870,20 +901,10 @@ static void cxl_dport_unlink(void *data)
sysfs_remove_link(&port->dev.kobj, link_name);
}
-/**
- * devm_cxl_add_dport - append downstream port data to a cxl_port
- * @port: the cxl_port that references this dport
- * @dport_dev: firmware or PCI device representing the dport
- * @port_id: identifier for this dport in a decoder's target list
- * @component_reg_phys: optional location of CXL component registers
- *
- * Note that dports are appended to the devm release action's of the
- * either the port's host (for root ports), or the port itself (for
- * switch ports)
- */
-struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
- struct device *dport_dev, int port_id,
- resource_size_t component_reg_phys)
+static struct cxl_dport *
+__devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
+ int port_id, resource_size_t component_reg_phys,
+ resource_size_t rcrb)
{
char link_name[CXL_TARGET_STRLEN];
struct cxl_dport *dport;
@@ -913,6 +934,9 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
dport->port_id = port_id;
dport->component_reg_phys = component_reg_phys;
dport->port = port;
+ if (rcrb != CXL_RESOURCE_NONE)
+ dport->rch = true;
+ dport->rcrb = rcrb;
cond_cxl_root_lock(port);
rc = add_dport(port, dport);
@@ -935,8 +959,74 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
return dport;
}
+
+/**
+ * devm_cxl_add_dport - append VH downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ *
+ * Note that dports are appended to the devm release action's of the
+ * either the port's host (for root ports), or the port itself (for
+ * switch ports)
+ */
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
+ struct device *dport_dev, int port_id,
+ resource_size_t component_reg_phys)
+{
+ struct cxl_dport *dport;
+
+ dport = __devm_cxl_add_dport(port, dport_dev, port_id,
+ component_reg_phys, CXL_RESOURCE_NONE);
+ if (IS_ERR(dport)) {
+ dev_dbg(dport_dev, "failed to add dport to %s: %ld\n",
+ dev_name(&port->dev), PTR_ERR(dport));
+ } else {
+ dev_dbg(dport_dev, "dport added to %s\n",
+ dev_name(&port->dev));
+ }
+
+ return dport;
+}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
+/**
+ * devm_cxl_add_rch_dport - append RCH downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ * @rcrb: mandatory location of a Root Complex Register Block
+ *
+ * See CXL 3.0 9.11.8 CXL Devices Attached to an RCH
+ */
+struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
+ struct device *dport_dev, int port_id,
+ resource_size_t component_reg_phys,
+ resource_size_t rcrb)
+{
+ struct cxl_dport *dport;
+
+ if (rcrb == CXL_RESOURCE_NONE) {
+ dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ dport = __devm_cxl_add_dport(port, dport_dev, port_id,
+ component_reg_phys, rcrb);
+ if (IS_ERR(dport)) {
+ dev_dbg(dport_dev, "failed to add RCH dport to %s: %ld\n",
+ dev_name(&port->dev), PTR_ERR(dport));
+ } else {
+ dev_dbg(dport_dev, "RCH dport added to %s\n",
+ dev_name(&port->dev));
+ }
+
+ return dport;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL);
+
static int add_ep(struct cxl_ep *new)
{
struct cxl_port *port = new->dport->port;
@@ -1122,47 +1212,6 @@ static void reap_dports(struct cxl_port *port)
}
}
-int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
- struct cxl_dport *parent_dport)
-{
- struct cxl_port *parent_port = parent_dport->port;
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_port *endpoint, *iter, *down;
- int rc;
-
- /*
- * Now that the path to the root is established record all the
- * intervening ports in the chain.
- */
- for (iter = parent_port, down = NULL; !is_cxl_root(iter);
- down = iter, iter = to_cxl_port(iter->dev.parent)) {
- struct cxl_ep *ep;
-
- ep = cxl_ep_load(iter, cxlmd);
- ep->next = down;
- }
-
- endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
- cxlds->component_reg_phys, parent_dport);
- if (IS_ERR(endpoint))
- return PTR_ERR(endpoint);
-
- dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
-
- rc = cxl_endpoint_autoremove(cxlmd, endpoint);
- if (rc)
- return rc;
-
- if (!endpoint->dev.driver) {
- dev_err(&cxlmd->dev, "%s failed probe\n",
- dev_name(&endpoint->dev));
- return -ENXIO;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, CXL);
-
static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
@@ -1243,7 +1292,7 @@ static resource_size_t find_component_registers(struct device *dev)
pdev = to_pci_dev(dev);
cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
- return cxl_regmap_to_base(pdev, &map);
+ return map.resource;
}
static int add_port_attach_ep(struct cxl_memdev *cxlmd,
@@ -1320,6 +1369,13 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
struct device *iter;
int rc;
+ /*
+ * Skip intermediate port enumeration in the RCH case, there
+ * are no ports in between a host bridge and an endpoint.
+ */
+ if (cxlmd->cxlds->rcd)
+ return 0;
+
rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd);
if (rc)
return rc;
@@ -1428,7 +1484,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
return rc;
}
-static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
+struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
{
struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
struct cxl_decoder *cxld = &cxlsd->cxld;
@@ -1441,6 +1497,7 @@ static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
return cxlrd->cxlsd.target[pos % iw];
}
+EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL);
static struct lock_class_key cxl_decoder_key;
@@ -1502,6 +1559,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* cxl_root_decoder_alloc - Allocate a root level decoder
* @port: owning CXL root of this decoder
* @nr_targets: static number of downstream targets
+ * @calc_hb: which host bridge covers the n'th position by granularity
*
* Return: A new cxl decoder to be registered by cxl_decoder_add(). A
* 'CXL root' decoder is one that decodes from a top-level / static platform
@@ -1509,7 +1567,8 @@ static int cxl_switch_decoder_init(struct cxl_port *port,
* topology.
*/
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
- unsigned int nr_targets)
+ unsigned int nr_targets,
+ cxl_calc_hb_fn calc_hb)
{
struct cxl_root_decoder *cxlrd;
struct cxl_switch_decoder *cxlsd;
@@ -1531,7 +1590,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
return ERR_PTR(rc);
}
- cxlrd->calc_hb = cxl_hb_modulo;
+ cxlrd->calc_hb = calc_hb;
cxld = &cxlsd->cxld;
cxld->dev.type = &cxl_decoder_root_type;
@@ -1797,12 +1856,27 @@ static void cxl_bus_remove(struct device *dev)
static struct workqueue_struct *cxl_bus_wq;
-int cxl_bus_rescan(void)
+static void cxl_bus_rescan_queue(struct work_struct *w)
{
- return bus_rescan_devices(&cxl_bus_type);
+ int rc = bus_rescan_devices(&cxl_bus_type);
+
+ pr_debug("CXL bus rescan result: %d\n", rc);
+}
+
+void cxl_bus_rescan(void)
+{
+ static DECLARE_WORK(rescan_work, cxl_bus_rescan_queue);
+
+ queue_work(cxl_bus_wq, &rescan_work);
}
EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
+void cxl_bus_drain(void)
+{
+ drain_workqueue(cxl_bus_wq);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_bus_drain, CXL);
+
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
{
return queue_work(cxl_bus_wq, &cxlmd->detach_work);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f9ae5ad284ff..02f28da519e3 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -324,7 +324,7 @@ static ssize_t interleave_ways_store(struct device *dev,
if (rc)
return rc;
- rc = ways_to_cxl(val, &iw);
+ rc = ways_to_eiw(val, &iw);
if (rc)
return rc;
@@ -391,7 +391,7 @@ static ssize_t interleave_granularity_store(struct device *dev,
if (rc)
return rc;
- rc = granularity_to_cxl(val, &ig);
+ rc = granularity_to_eig(val, &ig);
if (rc)
return rc;
@@ -1028,7 +1028,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
parent_iw = parent_cxld->interleave_ways;
}
- rc = granularity_to_cxl(parent_ig, &peig);
+ rc = granularity_to_eig(parent_ig, &peig);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
dev_name(parent_port->uport),
@@ -1036,7 +1036,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
- rc = ways_to_cxl(parent_iw, &peiw);
+ rc = ways_to_eiw(parent_iw, &peiw);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
dev_name(parent_port->uport),
@@ -1045,7 +1045,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
}
iw = cxl_rr->nr_targets;
- rc = ways_to_cxl(iw, &eiw);
+ rc = ways_to_eiw(iw, &eiw);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
dev_name(port->uport), dev_name(&port->dev), iw);
@@ -1065,7 +1065,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
eig = peig;
}
- rc = cxl_to_granularity(eig, &ig);
+ rc = eig_to_granularity(eig, &ig);
if (rc) {
dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
dev_name(port->uport), dev_name(&port->dev),
@@ -1226,7 +1226,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled_target;
struct cxl_memdev *cxlmd_target;
- cxled_target = p->targets[pos];
+ cxled_target = p->targets[i];
if (!cxled_target)
continue;
@@ -1403,6 +1403,8 @@ static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
goto out;
down_read(&cxl_dpa_rwsem);
rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
+ if (rc == 0)
+ set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
out:
@@ -1812,6 +1814,7 @@ static struct lock_class_key cxl_pmem_region_key;
static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
+ struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
struct device *dev;
int i;
@@ -1839,6 +1842,18 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
+ /*
+ * Regions never span CXL root devices, so by definition the
+ * bridge for one device is the same for all.
+ */
+ if (i == 0) {
+ cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev);
+ if (!cxl_nvb) {
+ cxlr_pmem = ERR_PTR(-ENODEV);
+ goto out;
+ }
+ cxlr->cxl_nvb = cxl_nvb;
+ }
m->cxlmd = cxlmd;
get_device(&cxlmd->dev);
m->start = cxled->dpa_res->start;
@@ -1848,6 +1863,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
dev = &cxlr_pmem->dev;
cxlr_pmem->cxlr = cxlr;
+ cxlr->cxlr_pmem = cxlr_pmem;
device_initialize(dev);
lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
device_set_pm_not_required(dev);
@@ -1860,9 +1876,36 @@ out:
return cxlr_pmem;
}
-static void cxlr_pmem_unregister(void *dev)
+static void cxlr_pmem_unregister(void *_cxlr_pmem)
{
- device_unregister(dev);
+ struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
+ struct cxl_region *cxlr = cxlr_pmem->cxlr;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
+
+ /*
+ * Either the bridge is in ->remove() context under the device_lock(),
+ * or cxlr_release_nvdimm() is cancelling the bridge's release action
+ * for @cxlr_pmem and doing it itself (while manually holding the bridge
+ * lock).
+ */
+ device_lock_assert(&cxl_nvb->dev);
+ cxlr->cxlr_pmem = NULL;
+ cxlr_pmem->cxlr = NULL;
+ device_unregister(&cxlr_pmem->dev);
+}
+
+static void cxlr_release_nvdimm(void *_cxlr)
+{
+ struct cxl_region *cxlr = _cxlr;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
+
+ device_lock(&cxl_nvb->dev);
+ if (cxlr->cxlr_pmem)
+ devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
+ cxlr->cxlr_pmem);
+ device_unlock(&cxl_nvb->dev);
+ cxlr->cxl_nvb = NULL;
+ put_device(&cxl_nvb->dev);
}
/**
@@ -1874,12 +1917,14 @@ static void cxlr_pmem_unregister(void *dev)
static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
{
struct cxl_pmem_region *cxlr_pmem;
+ struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
int rc;
cxlr_pmem = cxl_pmem_region_alloc(cxlr);
if (IS_ERR(cxlr_pmem))
return PTR_ERR(cxlr_pmem);
+ cxl_nvb = cxlr->cxl_nvb;
dev = &cxlr_pmem->dev;
rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
@@ -1893,13 +1938,52 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));
- return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev);
+ device_lock(&cxl_nvb->dev);
+ if (cxl_nvb->dev.driver)
+ rc = devm_add_action_or_reset(&cxl_nvb->dev,
+ cxlr_pmem_unregister, cxlr_pmem);
+ else
+ rc = -ENXIO;
+ device_unlock(&cxl_nvb->dev);
+
+ if (rc)
+ goto err_bridge;
+
+ /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
+ return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
err:
put_device(dev);
+err_bridge:
+ put_device(&cxl_nvb->dev);
+ cxlr->cxl_nvb = NULL;
return rc;
}
+static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
+{
+ if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
+ return 0;
+
+ if (!cpu_cache_has_invalidate_memregion()) {
+ if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
+ dev_warn(
+ &cxlr->dev,
+ "Bypassing cpu_cache_invalidate_memregion() for testing!\n");
+ clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
+ return 0;
+ } else {
+ dev_err(&cxlr->dev,
+ "Failed to synchronize CPU cache state\n");
+ return -ENXIO;
+ }
+ }
+
+ cpu_cache_invalidate_memregion(IORES_DESC_CXL);
+ clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
+ return 0;
+}
+
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
@@ -1915,14 +1999,21 @@ static int cxl_region_probe(struct device *dev)
if (p->state < CXL_CONFIG_COMMIT) {
dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
rc = -ENXIO;
+ goto out;
}
+ rc = cxl_region_invalidate_memregion(cxlr);
+
/*
* From this point on any path that changes the region's state away from
* CXL_CONFIG_COMMIT is also responsible for releasing the driver.
*/
+out:
up_read(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
@@ -1950,4 +2041,5 @@ void cxl_region_exit(void)
}
MODULE_IMPORT_NS(CXL);
+MODULE_IMPORT_NS(DEVMEM);
MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 39a129c57d40..1476a0299c9b 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -7,6 +7,8 @@
#include <cxlmem.h>
#include <cxlpci.h>
+#include "core.h"
+
/**
* DOC: cxl registers
*
@@ -59,36 +61,48 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
for (cap = 1; cap <= cap_count; cap++) {
void __iomem *register_block;
- u32 hdr;
- int decoder_cnt;
+ struct cxl_reg_map *rmap;
u16 cap_id, offset;
- u32 length;
+ u32 length, hdr;
hdr = readl(base + cap * 0x4);
cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
register_block = base + offset;
+ hdr = readl(register_block);
+ rmap = NULL;
switch (cap_id) {
- case CXL_CM_CAP_CAP_ID_HDM:
+ case CXL_CM_CAP_CAP_ID_HDM: {
+ int decoder_cnt;
+
dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
offset);
- hdr = readl(register_block);
-
decoder_cnt = cxl_hdm_decoder_count(hdr);
length = 0x20 * decoder_cnt + 0x10;
-
- map->hdm_decoder.valid = true;
- map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
- map->hdm_decoder.size = length;
+ rmap = &map->hdm_decoder;
+ break;
+ }
+ case CXL_CM_CAP_CAP_ID_RAS:
+ dev_dbg(dev, "found RAS capability (0x%x)\n",
+ offset);
+ length = CXL_RAS_CAPABILITY_LENGTH;
+ rmap = &map->ras;
break;
default:
dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
offset);
break;
}
+
+ if (!rmap)
+ continue;
+ rmap->valid = true;
+ rmap->id = cap_id;
+ rmap->offset = CXL_CM_OFFSET + offset;
+ rmap->size = length;
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_component_regs, CXL);
@@ -117,6 +131,7 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
for (cap = 1; cap <= cap_count; cap++) {
+ struct cxl_reg_map *rmap;
u32 offset, length;
u16 cap_id;
@@ -125,28 +140,22 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
offset = readl(base + cap * 0x10 + 0x4);
length = readl(base + cap * 0x10 + 0x8);
+ rmap = NULL;
switch (cap_id) {
case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
dev_dbg(dev, "found Status capability (0x%x)\n", offset);
-
- map->status.valid = true;
- map->status.offset = offset;
- map->status.size = length;
+ rmap = &map->status;
break;
case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
- map->mbox.valid = true;
- map->mbox.offset = offset;
- map->mbox.size = length;
+ rmap = &map->mbox;
break;
case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
break;
case CXLDEV_CAP_CAP_ID_MEMDEV:
dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
- map->memdev.valid = true;
- map->memdev.offset = offset;
- map->memdev.size = length;
+ rmap = &map->memdev;
break;
default:
if (cap_id >= 0x8000)
@@ -155,6 +164,13 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
break;
}
+
+ if (!rmap)
+ continue;
+ rmap->valid = true;
+ rmap->id = cap_id;
+ rmap->offset = offset;
+ rmap->size = length;
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
@@ -165,6 +181,9 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
void __iomem *ret_val;
struct resource *res;
+ if (WARN_ON_ONCE(addr == CXL_RESOURCE_NONE))
+ return NULL;
+
res = devm_request_mem_region(dev, addr, length, dev_name(dev));
if (!res) {
resource_size_t end = addr + length - 1;
@@ -180,67 +199,65 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
return ret_val;
}
-int cxl_map_component_regs(struct pci_dev *pdev,
- struct cxl_component_regs *regs,
- struct cxl_register_map *map)
+int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs,
+ struct cxl_register_map *map, unsigned long map_mask)
{
- struct device *dev = &pdev->dev;
- resource_size_t phys_addr;
- resource_size_t length;
-
- phys_addr = pci_resource_start(pdev, map->barno);
- phys_addr += map->block_offset;
+ struct mapinfo {
+ struct cxl_reg_map *rmap;
+ void __iomem **addr;
+ } mapinfo[] = {
+ { &map->component_map.hdm_decoder, &regs->hdm_decoder },
+ { &map->component_map.ras, &regs->ras },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mapinfo); i++) {
+ struct mapinfo *mi = &mapinfo[i];
+ resource_size_t phys_addr;
+ resource_size_t length;
- phys_addr += map->component_map.hdm_decoder.offset;
- length = map->component_map.hdm_decoder.size;
- regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
- if (!regs->hdm_decoder)
- return -ENOMEM;
+ if (!mi->rmap->valid)
+ continue;
+ if (!test_bit(mi->rmap->id, &map_mask))
+ continue;
+ phys_addr = map->resource + mi->rmap->offset;
+ length = mi->rmap->size;
+ *(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length);
+ if (!*(mi->addr))
+ return -ENOMEM;
+ }
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
-int cxl_map_device_regs(struct pci_dev *pdev,
+int cxl_map_device_regs(struct device *dev,
struct cxl_device_regs *regs,
struct cxl_register_map *map)
{
- struct device *dev = &pdev->dev;
- resource_size_t phys_addr;
-
- phys_addr = pci_resource_start(pdev, map->barno);
- phys_addr += map->block_offset;
-
- if (map->device_map.status.valid) {
- resource_size_t addr;
+ resource_size_t phys_addr = map->resource;
+ struct mapinfo {
+ struct cxl_reg_map *rmap;
+ void __iomem **addr;
+ } mapinfo[] = {
+ { &map->device_map.status, &regs->status, },
+ { &map->device_map.mbox, &regs->mbox, },
+ { &map->device_map.memdev, &regs->memdev, },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mapinfo); i++) {
+ struct mapinfo *mi = &mapinfo[i];
resource_size_t length;
-
- addr = phys_addr + map->device_map.status.offset;
- length = map->device_map.status.size;
- regs->status = devm_cxl_iomap_block(dev, addr, length);
- if (!regs->status)
- return -ENOMEM;
- }
-
- if (map->device_map.mbox.valid) {
resource_size_t addr;
- resource_size_t length;
- addr = phys_addr + map->device_map.mbox.offset;
- length = map->device_map.mbox.size;
- regs->mbox = devm_cxl_iomap_block(dev, addr, length);
- if (!regs->mbox)
- return -ENOMEM;
- }
-
- if (map->device_map.memdev.valid) {
- resource_size_t addr;
- resource_size_t length;
+ if (!mi->rmap->valid)
+ continue;
- addr = phys_addr + map->device_map.memdev.offset;
- length = map->device_map.memdev.size;
- regs->memdev = devm_cxl_iomap_block(dev, addr, length);
- if (!regs->memdev)
+ addr = phys_addr + mi->rmap->offset;
+ length = mi->rmap->size;
+ *(mi->addr) = devm_cxl_iomap_block(dev, addr, length);
+ if (!*(mi->addr))
return -ENOMEM;
}
@@ -248,13 +265,24 @@ int cxl_map_device_regs(struct pci_dev *pdev,
}
EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
-static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
+static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
- map->block_offset = ((u64)reg_hi << 32) |
- (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
- map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+ int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+ u64 offset = ((u64)reg_hi << 32) |
+ (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
+
+ if (offset > pci_resource_len(pdev, bar)) {
+ dev_warn(&pdev->dev,
+ "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
+ &pdev->resource[bar], &offset, map->reg_type);
+ return false;
+ }
+
map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->resource = pci_resource_start(pdev, bar) + offset;
+ map->max_size = pci_resource_len(pdev, bar) - offset;
+ return true;
}
/**
@@ -274,7 +302,7 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
u32 regloc_size, regblocks;
int regloc, i;
- map->block_offset = U64_MAX;
+ map->resource = CXL_RESOURCE_NONE;
regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DVSEC_REG_LOCATOR);
if (!regloc)
@@ -292,13 +320,79 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
pci_read_config_dword(pdev, regloc, &reg_lo);
pci_read_config_dword(pdev, regloc + 4, &reg_hi);
- cxl_decode_regblock(reg_lo, reg_hi, map);
+ if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map))
+ continue;
if (map->reg_type == type)
return 0;
}
- map->block_offset = U64_MAX;
+ map->resource = CXL_RESOURCE_NONE;
return -ENODEV;
}
EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
+
+resource_size_t cxl_rcrb_to_component(struct device *dev,
+ resource_size_t rcrb,
+ enum cxl_rcrb which)
+{
+ resource_size_t component_reg_phys;
+ void __iomem *addr;
+ u32 bar0, bar1;
+ u16 cmd;
+ u32 id;
+
+ if (which == CXL_RCRB_UPSTREAM)
+ rcrb += SZ_4K;
+
+ /*
+ * RCRB's BAR[0..1] point to component block containing CXL
+ * subsystem component registers. MEMBAR extraction follows
+ * the PCI Base spec here, esp. 64 bit extraction and memory
+ * ranges alignment (6.0, 7.5.1.2.1).
+ */
+ if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB"))
+ return CXL_RESOURCE_NONE;
+ addr = ioremap(rcrb, SZ_4K);
+ if (!addr) {
+ dev_err(dev, "Failed to map region %pr\n", addr);
+ release_mem_region(rcrb, SZ_4K);
+ return CXL_RESOURCE_NONE;
+ }
+
+ id = readl(addr + PCI_VENDOR_ID);
+ cmd = readw(addr + PCI_COMMAND);
+ bar0 = readl(addr + PCI_BASE_ADDRESS_0);
+ bar1 = readl(addr + PCI_BASE_ADDRESS_1);
+ iounmap(addr);
+ release_mem_region(rcrb, SZ_4K);
+
+ /*
+ * Sanity check, see CXL 3.0 Figure 9-8 CXL Device that Does Not
+ * Remap Upstream Port and Component Registers
+ */
+ if (id == U32_MAX) {
+ if (which == CXL_RCRB_DOWNSTREAM)
+ dev_err(dev, "Failed to access Downstream Port RCRB\n");
+ return CXL_RESOURCE_NONE;
+ }
+ if (!(cmd & PCI_COMMAND_MEMORY))
+ return CXL_RESOURCE_NONE;
+ /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */
+ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
+ return CXL_RESOURCE_NONE;
+
+ component_reg_phys = bar0 & PCI_BASE_ADDRESS_MEM_MASK;
+ if (bar0 & PCI_BASE_ADDRESS_MEM_TYPE_64)
+ component_reg_phys |= ((u64)bar1) << 32;
+
+ if (!component_reg_phys)
+ return CXL_RESOURCE_NONE;
+
+ /* MEMBAR is block size (64k) aligned. */
+ if (!IS_ALIGNED(component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE))
+ return CXL_RESOURCE_NONE;
+
+ return component_reg_phys;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ac75554b5d76..1b1cf459ac77 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -33,6 +33,7 @@
#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
+#define CXL_CM_CAP_CAP_ID_RAS 0x2
#define CXL_CM_CAP_CAP_ID_HDM 0x5
#define CXL_CM_CAP_CAP_HDM_VERSION 1
@@ -61,6 +62,10 @@
#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
+/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */
+#define CXL_DECODER_MIN_GRANULARITY 256
+#define CXL_DECODER_MAX_ENCODED_IG 6
+
static inline int cxl_hdm_decoder_count(u32 cap_hdr)
{
int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr);
@@ -69,23 +74,23 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr)
}
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
-static inline int cxl_to_granularity(u16 ig, unsigned int *val)
+static inline int eig_to_granularity(u16 eig, unsigned int *granularity)
{
- if (ig > 6)
+ if (eig > CXL_DECODER_MAX_ENCODED_IG)
return -EINVAL;
- *val = 256 << ig;
+ *granularity = CXL_DECODER_MIN_GRANULARITY << eig;
return 0;
}
/* Encode defined in CXL ECN "3, 6, 12 and 16-way memory Interleaving" */
-static inline int cxl_to_ways(u8 eniw, unsigned int *val)
+static inline int eiw_to_ways(u8 eiw, unsigned int *ways)
{
- switch (eniw) {
+ switch (eiw) {
case 0 ... 4:
- *val = 1 << eniw;
+ *ways = 1 << eiw;
break;
case 8 ... 10:
- *val = 3 << (eniw - 8);
+ *ways = 3 << (eiw - 8);
break;
default:
return -EINVAL;
@@ -94,20 +99,21 @@ static inline int cxl_to_ways(u8 eniw, unsigned int *val)
return 0;
}
-static inline int granularity_to_cxl(int g, u16 *ig)
+static inline int granularity_to_eig(int granularity, u16 *eig)
{
- if (g > SZ_16K || g < 256 || !is_power_of_2(g))
+ if (granularity > SZ_16K || granularity < CXL_DECODER_MIN_GRANULARITY ||
+ !is_power_of_2(granularity))
return -EINVAL;
- *ig = ilog2(g) - 8;
+ *eig = ilog2(granularity) - 8;
return 0;
}
-static inline int ways_to_cxl(unsigned int ways, u8 *iw)
+static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
{
if (ways > 16)
return -EINVAL;
if (is_power_of_2(ways)) {
- *iw = ilog2(ways);
+ *eiw = ilog2(ways);
return 0;
}
if (ways % 3)
@@ -115,10 +121,26 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw)
ways /= 3;
if (!is_power_of_2(ways))
return -EINVAL;
- *iw = ilog2(ways) + 8;
+ *eiw = ilog2(ways) + 8;
return 0;
}
+/* RAS Registers CXL 2.0 8.2.5.9 CXL RAS Capability Structure */
+#define CXL_RAS_UNCORRECTABLE_STATUS_OFFSET 0x0
+#define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
+#define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
+#define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
+#define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
+#define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
+#define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
+#define CXL_RAS_CORRECTABLE_STATUS_MASK GENMASK(6, 0)
+#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10
+#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
+#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
+#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
+#define CXL_RAS_HEADER_LOG_OFFSET 0x18
+#define CXL_RAS_CAPABILITY_LENGTH 0x58
+
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
#define CXLDEV_CAP_ARRAY_CAP_ID 0
@@ -153,9 +175,11 @@ struct cxl_regs {
/*
* Common set of CXL Component register block base pointers
* @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
+ * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure
*/
struct_group_tagged(cxl_component_regs, component,
void __iomem *hdm_decoder;
+ void __iomem *ras;
);
/*
* Common set of CXL Device register block base pointers
@@ -170,12 +194,14 @@ struct cxl_regs {
struct cxl_reg_map {
bool valid;
+ int id;
unsigned long offset;
unsigned long size;
};
struct cxl_component_reg_map {
struct cxl_reg_map hdm_decoder;
+ struct cxl_reg_map ras;
};
struct cxl_device_reg_map {
@@ -187,17 +213,17 @@ struct cxl_device_reg_map {
/**
* struct cxl_register_map - DVSEC harvested register block mapping parameters
* @base: virtual base of the register-block-BAR + @block_offset
- * @block_offset: offset to start of register block in @barno
+ * @resource: physical resource base of the register block
+ * @max_size: maximum mapping size to perform register search
* @reg_type: see enum cxl_regloc_type
- * @barno: PCI BAR number containing the register block
* @component_map: cxl_reg_map for component registers
* @device_map: cxl_reg_maps for device registers
*/
struct cxl_register_map {
void __iomem *base;
- u64 block_offset;
+ resource_size_t resource;
+ resource_size_t max_size;
u8 reg_type;
- u8 barno;
union {
struct cxl_component_reg_map component_map;
struct cxl_device_reg_map device_map;
@@ -208,18 +234,23 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_reg_map *map);
void cxl_probe_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_reg_map *map);
-int cxl_map_component_regs(struct pci_dev *pdev,
- struct cxl_component_regs *regs,
- struct cxl_register_map *map);
-int cxl_map_device_regs(struct pci_dev *pdev,
- struct cxl_device_regs *regs,
+int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs,
+ struct cxl_register_map *map,
+ unsigned long map_mask);
+int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs,
struct cxl_register_map *map);
enum cxl_regloc_type;
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map);
-void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
- resource_size_t length);
+
+enum cxl_rcrb {
+ CXL_RCRB_DOWNSTREAM,
+ CXL_RCRB_UPSTREAM,
+};
+resource_size_t cxl_rcrb_to_component(struct device *dev,
+ resource_size_t rcrb,
+ enum cxl_rcrb which);
#define CXL_RESOURCE_NONE ((resource_size_t) -1)
#define CXL_TARGET_STRLEN 20
@@ -248,7 +279,6 @@ enum cxl_decoder_type {
*/
#define CXL_DECODER_MAX_INTERLEAVE 16
-#define CXL_DECODER_MIN_GRANULARITY 256
/**
* struct cxl_decoder - Common CXL HDM Decoder Attributes
@@ -324,18 +354,23 @@ struct cxl_switch_decoder {
struct cxl_dport *target[];
};
+struct cxl_root_decoder;
+typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
+ int pos);
/**
* struct cxl_root_decoder - Static platform CXL address decoder
* @res: host / parent resource for region allocations
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
+ * @platform_data: platform specific configuration data
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
struct resource *res;
atomic_t region_id;
- struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos);
+ cxl_calc_hb_fn calc_hb;
+ void *platform_data;
struct cxl_switch_decoder cxlsd;
};
@@ -379,12 +414,21 @@ struct cxl_region_params {
int nr_targets;
};
+/*
+ * Flag whether this region needs to have its HPA span synchronized with
+ * CPU cache state at region activation time.
+ */
+#define CXL_REGION_F_INCOHERENT 0
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
* @id: This region's id. Id is globally unique across all regions
* @mode: Endpoint decoder allocation / access mode
* @type: Endpoint decoder target type
+ * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown
+ * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
+ * @flags: Region state flags
* @params: active + config params for the region
*/
struct cxl_region {
@@ -392,38 +436,26 @@ struct cxl_region {
int id;
enum cxl_decoder_mode mode;
enum cxl_decoder_type type;
+ struct cxl_nvdimm_bridge *cxl_nvb;
+ struct cxl_pmem_region *cxlr_pmem;
+ unsigned long flags;
struct cxl_region_params params;
};
-/**
- * enum cxl_nvdimm_brige_state - state machine for managing bus rescans
- * @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed
- * @CXL_NVB_DEAD: Set at brige unregistration to preclude async probing
- * @CXL_NVB_ONLINE: Target state after successful ->probe()
- * @CXL_NVB_OFFLINE: Target state after ->remove() or failed ->probe()
- */
-enum cxl_nvdimm_brige_state {
- CXL_NVB_NEW,
- CXL_NVB_DEAD,
- CXL_NVB_ONLINE,
- CXL_NVB_OFFLINE,
-};
-
struct cxl_nvdimm_bridge {
int id;
struct device dev;
struct cxl_port *port;
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_bus_descriptor nd_desc;
- struct work_struct state_work;
- enum cxl_nvdimm_brige_state state;
};
+#define CXL_DEV_ID_LEN 19
+
struct cxl_nvdimm {
struct device dev;
struct cxl_memdev *cxlmd;
- struct cxl_nvdimm_bridge *bridge;
- struct xarray pmem_regions;
+ u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */
};
struct cxl_pmem_region_mapping {
@@ -438,7 +470,6 @@ struct cxl_pmem_region {
struct device dev;
struct cxl_region *cxlr;
struct nd_region *nd_region;
- struct cxl_nvdimm_bridge *bridge;
struct range hpa_range;
int nr_mappings;
struct cxl_pmem_region_mapping mapping[];
@@ -500,12 +531,16 @@ cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev)
* @dport: PCI bridge or firmware device representing the downstream link
* @port_id: unique hardware identifier for dport in decoder target list
* @component_reg_phys: downstream port component registers
+ * @rcrb: base address for the Root Complex Register Block
+ * @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
*/
struct cxl_dport {
struct device *dport;
int port_id;
resource_size_t component_reg_phys;
+ resource_size_t rcrb;
+ bool rch;
struct cxl_port *port;
};
@@ -562,11 +597,10 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
resource_size_t component_reg_phys,
struct cxl_dport *parent_dport);
-int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
- struct cxl_dport *parent_dport);
struct cxl_port *find_cxl_root(struct device *dev);
int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
-int cxl_bus_rescan(void);
+void cxl_bus_rescan(void);
+void cxl_bus_drain(void);
struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
struct cxl_dport **dport);
bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
@@ -574,6 +608,10 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
struct device *dport, int port_id,
resource_size_t component_reg_phys);
+struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
+ struct device *dport_dev, int port_id,
+ resource_size_t component_reg_phys,
+ resource_size_t rcrb);
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
@@ -581,7 +619,9 @@ struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
- unsigned int nr_targets);
+ unsigned int nr_targets,
+ cxl_calc_hb_fn calc_hb);
+struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos);
struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
@@ -637,7 +677,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
-int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd);
+int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 88e3a8e54b6a..ab138004f644 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -35,6 +35,8 @@
* @cdev: char dev core object for ioctl operations
* @cxlds: The device state backing this device
* @detach_work: active memdev lost a port in its ancestry
+ * @cxl_nvb: coordinate removal of @cxl_nvd if present
+ * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
* @id: id number of this memdev instance.
*/
struct cxl_memdev {
@@ -42,6 +44,8 @@ struct cxl_memdev {
struct cdev cdev;
struct cxl_dev_state *cxlds;
struct work_struct detach_work;
+ struct cxl_nvdimm_bridge *cxl_nvb;
+ struct cxl_nvdimm *cxl_nvd;
int id;
};
@@ -76,6 +80,15 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
+ struct cxl_memdev *cxlmd)
+{
+ if (!port)
+ return NULL;
+
+ return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
+}
+
/**
* struct cxl_mbox_cmd - A command to be submitted to hardware.
* @opcode: (input) The command set and command submitted to hardware.
@@ -88,6 +101,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
* outputs commands this is always expected to be deterministic. For
* variable sized output commands, it tells the exact number of bytes
* written.
+ * @min_out: (input) internal command output payload size validation
* @return_code: (output) Error code returned from hardware.
*
* This is the primary mechanism used to send commands to the hardware.
@@ -102,6 +116,7 @@ struct cxl_mbox_cmd {
void *payload_out;
size_t size_in;
size_t size_out;
+ size_t min_out;
u16 return_code;
};
@@ -186,8 +201,10 @@ struct cxl_endpoint_dvsec_info {
* Currently only memory devices are represented.
*
* @dev: The device associated with this CXL state
+ * @cxlmd: The device representing the CXL.mem capabilities of @dev
* @regs: Parsed register blocks
* @cxl_dvsec: Offset to the PCIe device DVSEC
+ * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
* @payload_size: Size of space for payload
* (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
* @lsa_size: Size of Label Storage Area
@@ -218,10 +235,12 @@ struct cxl_endpoint_dvsec_info {
*/
struct cxl_dev_state {
struct device *dev;
+ struct cxl_memdev *cxlmd;
struct cxl_regs regs;
int cxl_dvsec;
+ bool rcd;
size_t payload_size;
size_t lsa_size;
struct mutex mbox_mutex; /* Protects device mailbox and firmware */
@@ -273,6 +292,12 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303,
CXL_MBOX_OP_SCAN_MEDIA = 0x4304,
CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305,
+ CXL_MBOX_OP_GET_SECURITY_STATE = 0x4500,
+ CXL_MBOX_OP_SET_PASSPHRASE = 0x4501,
+ CXL_MBOX_OP_DISABLE_PASSPHRASE = 0x4502,
+ CXL_MBOX_OP_UNLOCK = 0x4503,
+ CXL_MBOX_OP_FREEZE_SECURITY = 0x4504,
+ CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE = 0x4505,
CXL_MBOX_OP_MAX = 0x10000
};
@@ -372,8 +397,43 @@ struct cxl_mem_command {
#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
};
-int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
- size_t in_size, void *out, size_t out_size);
+#define CXL_PMEM_SEC_STATE_USER_PASS_SET 0x01
+#define CXL_PMEM_SEC_STATE_MASTER_PASS_SET 0x02
+#define CXL_PMEM_SEC_STATE_LOCKED 0x04
+#define CXL_PMEM_SEC_STATE_FROZEN 0x08
+#define CXL_PMEM_SEC_STATE_USER_PLIMIT 0x10
+#define CXL_PMEM_SEC_STATE_MASTER_PLIMIT 0x20
+
+/* set passphrase input payload */
+struct cxl_set_pass {
+ u8 type;
+ u8 reserved[31];
+ /* CXL field using NVDIMM define, same length */
+ u8 old_pass[NVDIMM_PASSPHRASE_LEN];
+ u8 new_pass[NVDIMM_PASSPHRASE_LEN];
+} __packed;
+
+/* disable passphrase input payload */
+struct cxl_disable_pass {
+ u8 type;
+ u8 reserved[31];
+ u8 pass[NVDIMM_PASSPHRASE_LEN];
+} __packed;
+
+/* passphrase secure erase payload */
+struct cxl_pass_erase {
+ u8 type;
+ u8 reserved[31];
+ u8 pass[NVDIMM_PASSPHRASE_LEN];
+} __packed;
+
+enum {
+ CXL_PMEM_SEC_PASS_MASTER = 0,
+ CXL_PMEM_SEC_PASS_USER,
+};
+
+int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *cmd);
int cxl_dev_state_identify(struct cxl_dev_state *cxlds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_dev_state *cxlds);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index eec597dbe763..920909791bb9 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -62,15 +62,6 @@ enum cxl_regloc_type {
CXL_REGLOC_RBI_TYPES
};
-static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
- struct cxl_register_map *map)
-{
- if (map->block_offset == U64_MAX)
- return CXL_RESOURCE_NONE;
-
- return pci_resource_start(pdev, map->barno) + map->block_offset;
-}
-
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 64ccf053d32c..39c4b54f0715 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -45,9 +45,60 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
return 0;
}
+static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *parent_port = parent_dport->port;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_port *endpoint, *iter, *down;
+ resource_size_t component_reg_phys;
+ int rc;
+
+ /*
+ * Now that the path to the root is established record all the
+ * intervening ports in the chain.
+ */
+ for (iter = parent_port, down = NULL; !is_cxl_root(iter);
+ down = iter, iter = to_cxl_port(iter->dev.parent)) {
+ struct cxl_ep *ep;
+
+ ep = cxl_ep_load(iter, cxlmd);
+ ep->next = down;
+ }
+
+ /*
+ * The component registers for an RCD might come from the
+ * host-bridge RCRB if they are not already mapped via the
+ * typical register locator mechanism.
+ */
+ if (parent_dport->rch && cxlds->component_reg_phys == CXL_RESOURCE_NONE)
+ component_reg_phys = cxl_rcrb_to_component(
+ &cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM);
+ else
+ component_reg_phys = cxlds->component_reg_phys;
+ endpoint = devm_cxl_add_port(host, &cxlmd->dev, component_reg_phys,
+ parent_dport);
+ if (IS_ERR(endpoint))
+ return PTR_ERR(endpoint);
+
+ rc = cxl_endpoint_autoremove(cxlmd, endpoint);
+ if (rc)
+ return rc;
+
+ if (!endpoint->dev.driver) {
+ dev_err(&cxlmd->dev, "%s failed probe\n",
+ dev_name(&endpoint->dev));
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
static int cxl_mem_probe(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct device *endpoint_parent;
struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
@@ -80,21 +131,34 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
- device_lock(&parent_port->dev);
- if (!parent_port->dev.driver) {
+ if (dport->rch)
+ endpoint_parent = parent_port->uport;
+ else
+ endpoint_parent = &parent_port->dev;
+
+ device_lock(endpoint_parent);
+ if (!endpoint_parent->driver) {
dev_err(dev, "CXL port topology %s not enabled\n",
- dev_name(&parent_port->dev));
+ dev_name(endpoint_parent));
rc = -ENXIO;
goto unlock;
}
- rc = devm_cxl_add_endpoint(cxlmd, dport);
+ rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport);
unlock:
- device_unlock(&parent_port->dev);
+ device_unlock(endpoint_parent);
put_device(&parent_port->dev);
if (rc)
return rc;
+ if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ rc = devm_cxl_add_nvdimm(cxlmd);
+ if (rc == -ENODEV)
+ dev_info(dev, "PMEM disabled by platform\n");
+ else
+ return rc;
+ }
+
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index faeb5d9d7a7a..33083a522fd1 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -9,10 +9,13 @@
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
+#include <linux/aer.h>
#include <linux/io.h>
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/cxl.h>
/**
* DOC: cxl pci
@@ -276,35 +279,22 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
{
- void __iomem *addr;
- int bar = map->barno;
struct device *dev = &pdev->dev;
- resource_size_t offset = map->block_offset;
-
- /* Basic sanity check that BAR is big enough */
- if (pci_resource_len(pdev, bar) < offset) {
- dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
- &pdev->resource[bar], &offset);
- return -ENXIO;
- }
- addr = pci_iomap(pdev, bar, 0);
- if (!addr) {
+ map->base = ioremap(map->resource, map->max_size);
+ if (!map->base) {
dev_err(dev, "failed to map registers\n");
return -ENOMEM;
}
- dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
- bar, &offset);
-
- map->base = addr + map->block_offset;
+ dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource);
return 0;
}
static void cxl_unmap_regblock(struct pci_dev *pdev,
struct cxl_register_map *map)
{
- pci_iounmap(pdev, map->base - map->block_offset);
+ iounmap(map->base);
map->base = NULL;
}
@@ -324,6 +314,9 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return -ENXIO;
}
+ if (!comp_map->ras.valid)
+ dev_dbg(dev, "RAS registers not found\n");
+
dev_dbg(dev, "Set up component registers\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
@@ -347,27 +340,6 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return 0;
}
-static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
-{
- struct device *dev = cxlds->dev;
- struct pci_dev *pdev = to_pci_dev(dev);
-
- switch (map->reg_type) {
- case CXL_REGLOC_RBI_COMPONENT:
- cxl_map_component_regs(pdev, &cxlds->regs.component, map);
- dev_dbg(dev, "Mapping component registers...\n");
- break;
- case CXL_REGLOC_RBI_MEMDEV:
- cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
- dev_dbg(dev, "Probing device registers...\n");
- break;
- default:
- break;
- }
-
- return 0;
-}
-
static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map)
{
@@ -418,6 +390,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
continue;
}
+ if (!pci_request_config_region_exclusive(pdev, off,
+ PCI_DOE_CAP_SIZEOF,
+ dev_name(dev)))
+ pci_err(pdev, "Failed to exclude DOE registers\n");
+
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
@@ -428,6 +405,20 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
}
}
+/*
+ * Assume that any RCIEP that emits the CXL memory expander class code
+ * is an RCD
+ */
+static bool is_cxl_restricted(struct pci_dev *pdev)
+{
+ return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
+}
+
+static void disable_aer(void *pdev)
+{
+ pci_disable_pcie_error_reporting(pdev);
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
@@ -449,7 +440,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
return PTR_ERR(cxlds);
+ pci_set_drvdata(pdev, cxlds);
+ cxlds->rcd = is_cxl_restricted(pdev);
cxlds->serial = pci_get_dsn(pdev);
cxlds->cxl_dvsec = pci_find_dvsec_capability(
pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
@@ -461,7 +454,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- rc = cxl_map_regs(cxlds, &map);
+ rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map);
if (rc)
return rc;
@@ -474,10 +467,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
- cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
+ cxlds->component_reg_phys = map.resource;
devm_cxl_pci_create_doe(cxlds);
+ rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component,
+ &map, BIT(CXL_CM_CAP_CAP_ID_RAS));
+ if (rc)
+ dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
+
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
@@ -498,8 +496,13 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
- rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
+ if (cxlds->regs.ras) {
+ pci_enable_pcie_error_reporting(pdev);
+ rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
+ if (rc)
+ return rc;
+ }
+ pci_save_state(pdev);
return rc;
}
@@ -511,10 +514,151 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+ void __iomem *addr;
+ u32 *log_addr;
+ int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+ addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ log_addr = log;
+
+ for (i = 0; i < log_u32_size; i++) {
+ *log_addr = readl(addr);
+ log_addr++;
+ addr += sizeof(u32);
+ }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ u32 hl[CXL_HEADERLOG_SIZE_U32];
+ void __iomem *addr;
+ u32 status;
+ u32 fe;
+
+ if (!cxlds->regs.ras)
+ return false;
+
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+ return false;
+
+ /* If multiple errors, log header points to first error from ctrl reg */
+ if (hweight32(status) > 1) {
+ addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ } else {
+ fe = status;
+ }
+
+ header_log_copy(cxlds, hl);
+ trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+ return true;
+}
+
+static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ bool ue;
+
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_report_and_clear(cxlds);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ if (ue) {
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(&pdev->dev,
+ "%s: frozen state error detected, disable CXL.mem\n",
+ dev_name(dev));
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(&pdev->dev,
+ "failure state error detected, request disconnect\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+
+ dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
+ dev_name(dev));
+ pci_restore_state(pdev);
+ if (device_attach(dev) <= 0)
+ return PCI_ERS_RESULT_DISCONNECT;
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void cxl_error_resume(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+
+ dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
+ dev->driver ? "successful" : "failed");
+}
+
+static void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ void __iomem *addr;
+ u32 status;
+
+ if (!cxlds->regs.ras)
+ return;
+
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+ trace_cxl_aer_correctable_error(dev, status);
+ }
+}
+
+static const struct pci_error_handlers cxl_error_handlers = {
+ .error_detected = cxl_error_detected,
+ .slot_reset = cxl_slot_reset,
+ .resume = cxl_error_resume,
+ .cor_error_detected = cxl_cor_error_detected,
+};
+
static struct pci_driver cxl_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = cxl_mem_pci_tbl,
.probe = cxl_pci_probe,
+ .err_handler = &cxl_error_handlers,
.driver = {
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 4c627d67281a..eedefebc4283 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -11,12 +11,7 @@
#include "cxlmem.h"
#include "cxl.h"
-/*
- * Ordered workqueue for cxl nvdimm device arrival and departure
- * to coordinate bus rescans when a bridge arrives and trigger remove
- * operations when the bridge is removed.
- */
-static struct workqueue_struct *cxl_pmem_wq;
+extern const struct nvdimm_security_ops *cxl_security_ops;
static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
@@ -27,78 +22,81 @@ static void clear_exclusive(void *cxlds)
static void unregister_nvdimm(void *nvdimm)
{
- struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
- struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
- struct cxl_pmem_region *cxlr_pmem;
- unsigned long index;
+ nvdimm_delete(nvdimm);
+}
- device_lock(&cxl_nvb->dev);
- dev_set_drvdata(&cxl_nvd->dev, NULL);
- xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) {
- get_device(&cxlr_pmem->dev);
- device_unlock(&cxl_nvb->dev);
+static ssize_t provider_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
- device_release_driver(&cxlr_pmem->dev);
- put_device(&cxlr_pmem->dev);
+ return sysfs_emit(buf, "%s\n", dev_name(&cxl_nvd->dev));
+}
+static DEVICE_ATTR_RO(provider);
- device_lock(&cxl_nvb->dev);
- }
- device_unlock(&cxl_nvb->dev);
+static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_dev_state *cxlds = cxl_nvd->cxlmd->cxlds;
- nvdimm_delete(nvdimm);
- cxl_nvd->bridge = NULL;
+ return sysfs_emit(buf, "%lld\n", cxlds->serial);
}
+static DEVICE_ATTR_RO(id);
+
+static struct attribute *cxl_dimm_attributes[] = {
+ &dev_attr_id.attr,
+ &dev_attr_provider.attr,
+ NULL
+};
+
+static const struct attribute_group cxl_dimm_attribute_group = {
+ .name = "cxl",
+ .attrs = cxl_dimm_attributes,
+};
+
+static const struct attribute_group *cxl_dimm_attribute_groups[] = {
+ &cxl_dimm_attribute_group,
+ NULL
+};
static int cxl_nvdimm_probe(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
unsigned long flags = 0, cmd_mask = 0;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_nvdimm_bridge *cxl_nvb;
struct nvdimm *nvdimm;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(dev);
- if (!cxl_nvb)
- return -ENXIO;
-
- device_lock(&cxl_nvb->dev);
- if (!cxl_nvb->nvdimm_bus) {
- rc = -ENXIO;
- goto out;
- }
-
set_exclusive_cxl_commands(cxlds, exclusive_cmds);
rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds);
if (rc)
- goto out;
+ return rc;
set_bit(NDD_LABELING, &flags);
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
- nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags,
- cmd_mask, 0, NULL);
- if (!nvdimm) {
- rc = -ENOMEM;
- goto out;
- }
+ nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd,
+ cxl_dimm_attribute_groups, flags,
+ cmd_mask, 0, NULL, cxl_nvd->dev_id,
+ cxl_security_ops, NULL);
+ if (!nvdimm)
+ return -ENOMEM;
dev_set_drvdata(dev, nvdimm);
- cxl_nvd->bridge = cxl_nvb;
- rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
-out:
- device_unlock(&cxl_nvb->dev);
- put_device(&cxl_nvb->dev);
-
- return rc;
+ return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
}
static struct cxl_driver cxl_nvdimm_driver = {
.name = "cxl_nvdimm",
.probe = cxl_nvdimm_probe,
.id = CXL_DEVICE_NVDIMM,
+ .drv = {
+ .suppress_bind_attrs = true,
+ },
};
static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds,
@@ -121,6 +119,7 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
unsigned int buf_len)
{
struct cxl_mbox_get_lsa get_lsa;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
if (sizeof(*cmd) > buf_len)
@@ -132,9 +131,15 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds,
.offset = cpu_to_le32(cmd->in_offset),
.length = cpu_to_le32(cmd->in_length),
};
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_LSA,
+ .payload_in = &get_lsa,
+ .size_in = sizeof(get_lsa),
+ .size_out = cmd->in_length,
+ .payload_out = cmd->out_buf,
+ };
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
- sizeof(get_lsa), cmd->out_buf, cmd->in_length);
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
cmd->status = 0;
return rc;
@@ -145,6 +150,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
unsigned int buf_len)
{
struct cxl_mbox_set_lsa *set_lsa;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
if (sizeof(*cmd) > buf_len)
@@ -163,10 +169,13 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds,
.offset = cpu_to_le32(cmd->in_offset),
};
memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_LSA,
+ .payload_in = set_lsa,
+ .size_in = struct_size(set_lsa, data, cmd->in_length),
+ };
- rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa,
- struct_size(set_lsa, data, cmd->in_length),
- NULL, 0);
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
/*
* Set "firmware" status (4-packed bytes at the end of the input
@@ -216,204 +225,48 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
}
-static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
-{
- if (cxl_nvb->nvdimm_bus)
- return true;
- cxl_nvb->nvdimm_bus =
- nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
- return cxl_nvb->nvdimm_bus != NULL;
-}
-
-static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
-{
- struct cxl_nvdimm *cxl_nvd;
-
- if (!is_cxl_nvdimm(dev))
- return 0;
-
- cxl_nvd = to_cxl_nvdimm(dev);
- if (cxl_nvd->bridge != cxl_nvb)
- return 0;
-
- device_release_driver(dev);
- return 0;
-}
-
-static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
-{
- struct cxl_pmem_region *cxlr_pmem;
-
- if (!is_cxl_pmem_region(dev))
- return 0;
-
- cxlr_pmem = to_cxl_pmem_region(dev);
- if (cxlr_pmem->bridge != cxl_nvb)
- return 0;
-
- device_release_driver(dev);
- return 0;
-}
-
-static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
- struct nvdimm_bus *nvdimm_bus)
+static void unregister_nvdimm_bus(void *_cxl_nvb)
{
- if (!nvdimm_bus)
- return;
+ struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
+ struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
- /*
- * Set the state of cxl_nvdimm devices to unbound / idle before
- * nvdimm_bus_unregister() rips the nvdimm objects out from
- * underneath them.
- */
- bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
- cxl_pmem_region_release_driver);
- bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
- cxl_nvdimm_release_driver);
+ cxl_nvb->nvdimm_bus = NULL;
nvdimm_bus_unregister(nvdimm_bus);
}
-static void cxl_nvb_update_state(struct work_struct *work)
-{
- struct cxl_nvdimm_bridge *cxl_nvb =
- container_of(work, typeof(*cxl_nvb), state_work);
- struct nvdimm_bus *victim_bus = NULL;
- bool release = false, rescan = false;
-
- device_lock(&cxl_nvb->dev);
- switch (cxl_nvb->state) {
- case CXL_NVB_ONLINE:
- if (!online_nvdimm_bus(cxl_nvb)) {
- dev_err(&cxl_nvb->dev,
- "failed to establish nvdimm bus\n");
- release = true;
- } else
- rescan = true;
- break;
- case CXL_NVB_OFFLINE:
- case CXL_NVB_DEAD:
- victim_bus = cxl_nvb->nvdimm_bus;
- cxl_nvb->nvdimm_bus = NULL;
- break;
- default:
- break;
- }
- device_unlock(&cxl_nvb->dev);
-
- if (release)
- device_release_driver(&cxl_nvb->dev);
- if (rescan) {
- int rc = bus_rescan_devices(&cxl_bus_type);
-
- dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
- }
- offline_nvdimm_bus(cxl_nvb, victim_bus);
-
- put_device(&cxl_nvb->dev);
-}
-
-static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb)
-{
- /*
- * Take a reference that the workqueue will drop if new work
- * gets queued.
- */
- get_device(&cxl_nvb->dev);
- if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work))
- put_device(&cxl_nvb->dev);
-}
-
-static void cxl_nvdimm_bridge_remove(struct device *dev)
-{
- struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
-
- if (cxl_nvb->state == CXL_NVB_ONLINE)
- cxl_nvb->state = CXL_NVB_OFFLINE;
- cxl_nvdimm_bridge_state_work(cxl_nvb);
-}
-
static int cxl_nvdimm_bridge_probe(struct device *dev)
{
struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
- if (cxl_nvb->state == CXL_NVB_DEAD)
- return -ENXIO;
-
- if (cxl_nvb->state == CXL_NVB_NEW) {
- cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
- .provider_name = "CXL",
- .module = THIS_MODULE,
- .ndctl = cxl_pmem_ctl,
- };
+ cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) {
+ .provider_name = "CXL",
+ .module = THIS_MODULE,
+ .ndctl = cxl_pmem_ctl,
+ };
- INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state);
- }
+ cxl_nvb->nvdimm_bus =
+ nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc);
- cxl_nvb->state = CXL_NVB_ONLINE;
- cxl_nvdimm_bridge_state_work(cxl_nvb);
+ if (!cxl_nvb->nvdimm_bus)
+ return -ENOMEM;
- return 0;
+ return devm_add_action_or_reset(dev, unregister_nvdimm_bus, cxl_nvb);
}
static struct cxl_driver cxl_nvdimm_bridge_driver = {
.name = "cxl_nvdimm_bridge",
.probe = cxl_nvdimm_bridge_probe,
- .remove = cxl_nvdimm_bridge_remove,
.id = CXL_DEVICE_NVDIMM_BRIDGE,
+ .drv = {
+ .suppress_bind_attrs = true,
+ },
};
-static int match_cxl_nvdimm(struct device *dev, void *data)
-{
- return is_cxl_nvdimm(dev);
-}
-
static void unregister_nvdimm_region(void *nd_region)
{
nvdimm_region_delete(nd_region);
}
-static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd,
- struct cxl_pmem_region *cxlr_pmem)
-{
- int rc;
-
- rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem,
- cxlr_pmem, GFP_KERNEL);
- if (rc)
- return rc;
-
- get_device(&cxlr_pmem->dev);
- return 0;
-}
-
-static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd,
- struct cxl_pmem_region *cxlr_pmem)
-{
- /*
- * It is possible this is called without a corresponding
- * cxl_nvdimm_add_region for @cxlr_pmem
- */
- cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem);
- if (cxlr_pmem)
- put_device(&cxlr_pmem->dev);
-}
-
-static void release_mappings(void *data)
-{
- int i;
- struct cxl_pmem_region *cxlr_pmem = data;
- struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge;
-
- device_lock(&cxl_nvb->dev);
- for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
- struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
- struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
-
- cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem);
- }
- device_unlock(&cxl_nvb->dev);
-}
-
static void cxlr_pmem_remove_resource(void *res)
{
remove_resource(res);
@@ -429,8 +282,8 @@ static int cxl_pmem_region_probe(struct device *dev)
struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
struct cxl_region *cxlr = cxlr_pmem->cxlr;
+ struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
struct cxl_pmem_region_info *info = NULL;
- struct cxl_nvdimm_bridge *cxl_nvb;
struct nd_interleave_set *nd_set;
struct nd_region_desc ndr_desc;
struct cxl_nvdimm *cxl_nvd;
@@ -438,28 +291,12 @@ static int cxl_pmem_region_probe(struct device *dev)
struct resource *res;
int rc, i = 0;
- cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
- if (!cxl_nvb) {
- dev_dbg(dev, "bridge not found\n");
- return -ENXIO;
- }
- cxlr_pmem->bridge = cxl_nvb;
-
- device_lock(&cxl_nvb->dev);
- if (!cxl_nvb->nvdimm_bus) {
- dev_dbg(dev, "nvdimm bus not found\n");
- rc = -ENXIO;
- goto out_nvb;
- }
-
memset(&mappings, 0, sizeof(mappings));
memset(&ndr_desc, 0, sizeof(ndr_desc));
res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
- if (!res) {
- rc = -ENOMEM;
- goto out_nvb;
- }
+ if (!res)
+ return -ENOMEM;
res->name = "Persistent Memory";
res->start = cxlr_pmem->hpa_range.start;
@@ -469,11 +306,11 @@ static int cxl_pmem_region_probe(struct device *dev)
rc = insert_resource(&iomem_resource, res);
if (rc)
- goto out_nvb;
+ return rc;
rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
if (rc)
- goto out_nvb;
+ return rc;
ndr_desc.res = res;
ndr_desc.provider_data = cxlr_pmem;
@@ -487,43 +324,23 @@ static int cxl_pmem_region_probe(struct device *dev)
}
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
- if (!nd_set) {
- rc = -ENOMEM;
- goto out_nvb;
- }
+ if (!nd_set)
+ return -ENOMEM;
ndr_desc.memregion = cxlr->id;
set_bit(ND_REGION_CXL, &ndr_desc.flags);
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
- if (!info) {
- rc = -ENOMEM;
- goto out_nvb;
- }
-
- rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem);
- if (rc)
- goto out_nvd;
+ if (!info)
+ return -ENOMEM;
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_memdev *cxlmd = m->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct device *d;
-
- d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
- if (!d) {
- dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
- dev_name(&cxlmd->dev));
- rc = -ENODEV;
- goto out_nvd;
- }
- /* safe to drop ref now with bridge lock held */
- put_device(d);
-
- cxl_nvd = to_cxl_nvdimm(d);
+ cxl_nvd = cxlmd->cxl_nvd;
nvdimm = dev_get_drvdata(&cxl_nvd->dev);
if (!nvdimm) {
dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
@@ -532,14 +349,6 @@ static int cxl_pmem_region_probe(struct device *dev)
goto out_nvd;
}
- /*
- * Pin the region per nvdimm device as those may be released
- * out-of-order with respect to the region, and a single nvdimm
- * maybe associated with multiple regions
- */
- rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem);
- if (rc)
- goto out_nvd;
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
@@ -572,9 +381,6 @@ static int cxl_pmem_region_probe(struct device *dev)
cxlr_pmem->nd_region);
out_nvd:
kfree(info);
-out_nvb:
- device_unlock(&cxl_nvb->dev);
- put_device(&cxl_nvb->dev);
return rc;
}
@@ -583,33 +389,11 @@ static struct cxl_driver cxl_pmem_region_driver = {
.name = "cxl_pmem_region",
.probe = cxl_pmem_region_probe,
.id = CXL_DEVICE_PMEM_REGION,
+ .drv = {
+ .suppress_bind_attrs = true,
+ },
};
-/*
- * Return all bridges to the CXL_NVB_NEW state to invalidate any
- * ->state_work referring to the now destroyed cxl_pmem_wq.
- */
-static int cxl_nvdimm_bridge_reset(struct device *dev, void *data)
-{
- struct cxl_nvdimm_bridge *cxl_nvb;
-
- if (!is_cxl_nvdimm_bridge(dev))
- return 0;
-
- cxl_nvb = to_cxl_nvdimm_bridge(dev);
- device_lock(dev);
- cxl_nvb->state = CXL_NVB_NEW;
- device_unlock(dev);
-
- return 0;
-}
-
-static void destroy_cxl_pmem_wq(void)
-{
- destroy_workqueue(cxl_pmem_wq);
- bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset);
-}
-
static __init int cxl_pmem_init(void)
{
int rc;
@@ -617,13 +401,9 @@ static __init int cxl_pmem_init(void)
set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
- cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
- if (!cxl_pmem_wq)
- return -ENXIO;
-
rc = cxl_driver_register(&cxl_nvdimm_bridge_driver);
if (rc)
- goto err_bridge;
+ return rc;
rc = cxl_driver_register(&cxl_nvdimm_driver);
if (rc)
@@ -639,8 +419,6 @@ err_region:
cxl_driver_unregister(&cxl_nvdimm_driver);
err_nvdimm:
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
-err_bridge:
- destroy_cxl_pmem_wq();
return rc;
}
@@ -649,7 +427,6 @@ static __exit void cxl_pmem_exit(void)
cxl_driver_unregister(&cxl_pmem_region_driver);
cxl_driver_unregister(&cxl_nvdimm_driver);
cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
- destroy_cxl_pmem_wq();
}
MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c
new file mode 100644
index 000000000000..4ad4bda2d18e
--- /dev/null
+++ b/drivers/cxl/security.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/libnvdimm.h>
+#include <asm/unaligned.h>
+#include <linux/module.h>
+#include <linux/async.h>
+#include <linux/slab.h>
+#include <linux/memregion.h>
+#include "cxlmem.h"
+#include "cxl.h"
+
+static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ unsigned long security_flags = 0;
+ struct cxl_get_security_output {
+ __le32 flags;
+ } out;
+ struct cxl_mbox_cmd mbox_cmd;
+ u32 sec_out;
+ int rc;
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_SECURITY_STATE,
+ .size_out = sizeof(out),
+ .payload_out = &out,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ return 0;
+
+ sec_out = le32_to_cpu(out.flags);
+ if (ptype == NVDIMM_MASTER) {
+ if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)
+ set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+ else
+ set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+ if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PLIMIT)
+ set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+ return security_flags;
+ }
+
+ if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET) {
+ if (sec_out & CXL_PMEM_SEC_STATE_FROZEN ||
+ sec_out & CXL_PMEM_SEC_STATE_USER_PLIMIT)
+ set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+
+ if (sec_out & CXL_PMEM_SEC_STATE_LOCKED)
+ set_bit(NVDIMM_SECURITY_LOCKED, &security_flags);
+ else
+ set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+ } else {
+ set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+ }
+
+ return security_flags;
+}
+
+static int cxl_pmem_security_change_key(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *old_data,
+ const struct nvdimm_key_data *new_data,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_set_pass set_pass;
+
+ set_pass = (struct cxl_set_pass) {
+ .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
+ CXL_PMEM_SEC_PASS_USER,
+ };
+ memcpy(set_pass.old_pass, old_data->data, NVDIMM_PASSPHRASE_LEN);
+ memcpy(set_pass.new_pass, new_data->data, NVDIMM_PASSPHRASE_LEN);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_PASSPHRASE,
+ .size_in = sizeof(set_pass),
+ .payload_in = &set_pass,
+ };
+
+ return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+}
+
+static int __cxl_pmem_security_disable(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_disable_pass dis_pass;
+ struct cxl_mbox_cmd mbox_cmd;
+
+ dis_pass = (struct cxl_disable_pass) {
+ .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
+ CXL_PMEM_SEC_PASS_USER,
+ };
+ memcpy(dis_pass.pass, key_data->data, NVDIMM_PASSPHRASE_LEN);
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_DISABLE_PASSPHRASE,
+ .size_in = sizeof(dis_pass),
+ .payload_in = &dis_pass,
+ };
+
+ return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+}
+
+static int cxl_pmem_security_disable(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data)
+{
+ return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_USER);
+}
+
+static int cxl_pmem_security_disable_master(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data)
+{
+ return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_MASTER);
+}
+
+static int cxl_pmem_security_freeze(struct nvdimm *nvdimm)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = CXL_MBOX_OP_FREEZE_SECURITY,
+ };
+
+ return cxl_internal_send_cmd(cxlds, &mbox_cmd);
+}
+
+static int cxl_pmem_security_unlock(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key_data)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ u8 pass[NVDIMM_PASSPHRASE_LEN];
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ memcpy(pass, key_data->data, NVDIMM_PASSPHRASE_LEN);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_UNLOCK,
+ .size_in = NVDIMM_PASSPHRASE_LEN,
+ .payload_in = pass,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm,
+ const struct nvdimm_key_data *key,
+ enum nvdimm_passphrase_type ptype)
+{
+ struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
+ struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_pass_erase erase;
+ int rc;
+
+ erase = (struct cxl_pass_erase) {
+ .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER :
+ CXL_PMEM_SEC_PASS_USER,
+ };
+ memcpy(erase.pass, key->data, NVDIMM_PASSPHRASE_LEN);
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE,
+ .size_in = sizeof(erase),
+ .payload_in = &erase,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static const struct nvdimm_security_ops __cxl_security_ops = {
+ .get_flags = cxl_pmem_get_security_flags,
+ .change_key = cxl_pmem_security_change_key,
+ .disable = cxl_pmem_security_disable,
+ .freeze = cxl_pmem_security_freeze,
+ .unlock = cxl_pmem_security_unlock,
+ .erase = cxl_pmem_security_passphrase_erase,
+ .disable_master = cxl_pmem_security_disable_master,
+};
+
+const struct nvdimm_security_ops *cxl_security_ops = &__cxl_security_ops;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 5a29046e3319..79d93126453d 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -114,4 +114,16 @@ config NVDIMM_TEST_BUILD
core devm_memremap_pages() implementation and other
infrastructure.
+config NVDIMM_SECURITY_TEST
+ bool "Enable NVDIMM security unit tests"
+ depends on NVDIMM_KEYS
+ help
+ The NVDIMM and CXL subsystems support unit testing of their device
+ security state machines. The NVDIMM_SECURITY_TEST option disables CPU
+ cache maintenance operations around events like secure erase and
+ overwrite. Also, when enabled, the NVDIMM subsystem core helps the unit
+ test implement a mock state machine.
+
+ Select N if unsure.
+
endif
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c7c980577491..1fc081dcf631 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -349,11 +349,18 @@ static ssize_t available_slots_show(struct device *dev,
}
static DEVICE_ATTR_RO(available_slots);
-__weak ssize_t security_show(struct device *dev,
+ssize_t security_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
+ /*
+ * For the test version we need to poll the "hardware" in order
+ * to get the updated status for unlock testing.
+ */
+ if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST))
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+
if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
return sprintf(buf, "overwrite\n");
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 390123d293ea..88dc062af5f8 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -2,6 +2,7 @@
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
+#include <linux/memregion.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/device.h>
@@ -100,6 +101,16 @@ static void nd_region_remove(struct device *dev)
*/
sysfs_put(nd_region->bb_state);
nd_region->bb_state = NULL;
+
+ /*
+ * Try to flush caches here since a disabled region may be subject to
+ * secure erase while disabled, and previous dirty data should not be
+ * written back to a new instance of the region. This only matters on
+ * bare metal where security commands are available, so silent failure
+ * here is ok.
+ */
+ if (cpu_cache_has_invalidate_memregion())
+ cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
}
static int child_notify(struct device *dev, void *data)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e0875d369762..83dbf398ea84 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -59,9 +59,51 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
return 0;
}
+static int nd_region_invalidate_memregion(struct nd_region *nd_region)
+{
+ int i, incoherent = 0;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) {
+ incoherent++;
+ break;
+ }
+ }
+
+ if (!incoherent)
+ return 0;
+
+ if (!cpu_cache_has_invalidate_memregion()) {
+ if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) {
+ dev_warn(
+ &nd_region->dev,
+ "Bypassing cpu_cache_invalidate_memergion() for testing!\n");
+ goto out;
+ } else {
+ dev_err(&nd_region->dev,
+ "Failed to synchronize CPU cache state\n");
+ return -ENXIO;
+ }
+ }
+
+ cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
+out:
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ clear_bit(NDD_INCOHERENT, &nvdimm->flags);
+ }
+
+ return 0;
+}
+
int nd_region_activate(struct nd_region *nd_region)
{
- int i, j, num_flush = 0;
+ int i, j, rc, num_flush = 0;
struct nd_region_data *ndrd;
struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
@@ -85,6 +127,10 @@ int nd_region_activate(struct nd_region *nd_region)
}
nvdimm_bus_unlock(&nd_region->dev);
+ rc = nd_region_invalidate_memregion(nd_region);
+ if (rc)
+ return rc;
+
ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
if (!ndrd)
return -ENOMEM;
@@ -1222,3 +1268,5 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}
+
+MODULE_IMPORT_NS(DEVMEM);
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 8aefb60c42ff..a03e3c45f297 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
|| !nvdimm->sec.flags)
return -EIO;
+ /* cxl_test needs this to pre-populate the security state */
+ if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST))
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+
/* No need to go further if security is disabled */
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
return 0;
@@ -204,6 +208,8 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
rc = nvdimm->sec.ops->unlock(nvdimm, data);
dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
+ if (rc == 0)
+ set_bit(NDD_INCOHERENT, &nvdimm->flags);
nvdimm_put_key(key);
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
@@ -239,7 +245,8 @@ static int check_security_state(struct nvdimm *nvdimm)
return 0;
}
-static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
+static int security_disable(struct nvdimm *nvdimm, unsigned int keyid,
+ enum nvdimm_passphrase_type pass_type)
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
@@ -250,8 +257,13 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
- if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
- || !nvdimm->sec.flags)
+ if (!nvdimm->sec.ops || !nvdimm->sec.flags)
+ return -EOPNOTSUPP;
+
+ if (pass_type == NVDIMM_USER && !nvdimm->sec.ops->disable)
+ return -EOPNOTSUPP;
+
+ if (pass_type == NVDIMM_MASTER && !nvdimm->sec.ops->disable_master)
return -EOPNOTSUPP;
rc = check_security_state(nvdimm);
@@ -263,12 +275,21 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid)
if (!data)
return -ENOKEY;
- rc = nvdimm->sec.ops->disable(nvdimm, data);
- dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
+ if (pass_type == NVDIMM_MASTER) {
+ rc = nvdimm->sec.ops->disable_master(nvdimm, data);
+ dev_dbg(dev, "key: %d disable_master: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
+ } else {
+ rc = nvdimm->sec.ops->disable(nvdimm, data);
+ dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
+ rc == 0 ? "success" : "fail");
+ }
nvdimm_put_key(key);
- nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+ if (pass_type == NVDIMM_MASTER)
+ nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
+ else
+ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
return rc;
}
@@ -355,6 +376,8 @@ static int security_erase(struct nvdimm *nvdimm, unsigned int keyid,
return -ENOKEY;
rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
+ if (rc == 0)
+ set_bit(NDD_INCOHERENT, &nvdimm->flags);
dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
rc == 0 ? "success" : "fail");
@@ -389,6 +412,8 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
return -ENOKEY;
rc = nvdimm->sec.ops->overwrite(nvdimm, data);
+ if (rc == 0)
+ set_bit(NDD_INCOHERENT, &nvdimm->flags);
dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
@@ -473,6 +498,7 @@ void nvdimm_security_overwrite_query(struct work_struct *work)
#define OPS \
C( OP_FREEZE, "freeze", 1), \
C( OP_DISABLE, "disable", 2), \
+ C( OP_DISABLE_MASTER, "disable_master", 2), \
C( OP_UPDATE, "update", 3), \
C( OP_ERASE, "erase", 2), \
C( OP_OVERWRITE, "overwrite", 2), \
@@ -524,7 +550,10 @@ ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len)
rc = nvdimm_security_freeze(nvdimm);
} else if (i == OP_DISABLE) {
dev_dbg(dev, "disable %u\n", key);
- rc = security_disable(nvdimm, key);
+ rc = security_disable(nvdimm, key, NVDIMM_USER);
+ } else if (i == OP_DISABLE_MASTER) {
+ dev_dbg(dev, "disable_master %u\n", key);
+ rc = security_disable(nvdimm, key, NVDIMM_MASTER);
} else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) {
dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey);
rc = security_update(nvdimm, key, newkey, i == OP_UPDATE
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 0a2eeb82cebd..6c250eb214e8 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -756,6 +756,13 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
if (ret)
return ret;
+ if (resource_is_exclusive(&dev->driver_exclusive_resource, off,
+ count)) {
+ pci_warn_once(dev, "%s: Unexpected write to kernel-exclusive config offset %llx",
+ current->comm, off);
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+ }
+
if (off > dev->cfg_size)
return 0;
if (off + count > dev->cfg_size) {
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e2d8a74f83c3..625f7b2cafe4 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -961,8 +961,14 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (aer)
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
- if (pcie_aer_is_native(dev))
+ if (pcie_aer_is_native(dev)) {
+ struct pci_driver *pdrv = dev->driver;
+
+ if (pdrv && pdrv->err_handler &&
+ pdrv->err_handler->cor_error_detected)
+ pdrv->err_handler->cor_error_detected(dev);
pcie_clear_device_status(dev);
+ }
} else if (info->severity == AER_NONFATAL)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index fdd7e56ddf40..334fd91f01e1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2305,6 +2305,12 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus)
INIT_LIST_HEAD(&dev->bus_list);
dev->dev.type = &pci_dev_type;
dev->bus = pci_bus_get(bus);
+ dev->driver_exclusive_resource = (struct resource) {
+ .name = "PCI Exclusive",
+ .start = 0,
+ .end = -1,
+ };
+
#ifdef CONFIG_PCI_MSI
raw_spin_lock_init(&dev->msi_lock);
#endif