From 962f1e79e7acfb30207a378894b1bbf6742e6212 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 2 May 2024 09:57:30 -0700 Subject: PCI/CXL: Move CXL Vendor ID to pci_ids.h Move PCI_DVSEC_VENDOR_ID_CXL in CXL private code to PCI_VENDOR_ID_CXL in pci_ids.h in order to be utilized in PCI subsystem. While the CXL Vendor ID (0x1e98) is not listed in the PCI SIG "Member Companies" database at https://pcisig.com/membership/member-companies, the SIG has confirmed that it is reserved by CXL. Link: https://lore.kernel.org/r/20240502165851.1948523-2-dave.jiang@intel.com Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/linux-cxl/20240402172323.GA1818777@bhelgaas/ Signed-off-by: Dave Jiang [bhelgaas: update commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dan Williams --- drivers/cxl/core/pci.c | 6 +++--- drivers/cxl/core/regs.c | 2 +- drivers/cxl/cxlpci.h | 1 - drivers/cxl/pci.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 0df09bd79408..c496a9710d62 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -525,7 +525,7 @@ static int cxl_cdat_get_length(struct device *dev, __le32 response[2]; int rc; - rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL, + rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS, &request, sizeof(request), &response, sizeof(response)); @@ -555,7 +555,7 @@ static int cxl_cdat_read_table(struct device *dev, __le32 request = CDAT_DOE_REQ(entry_handle); int rc; - rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL, + rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS, &request, sizeof(request), rsp, sizeof(*rsp) + remaining); @@ -640,7 +640,7 @@ void read_cdat_data(struct cxl_port *port) if (!pdev) return; - doe_mb = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL, + doe_mb = pci_find_doe_mailbox(pdev, PCI_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS); if (!doe_mb) { dev_dbg(dev, "No CDAT mailbox\n"); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 372786f80955..da52fc9e234b 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -313,7 +313,7 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, .resource = CXL_RESOURCE_NONE, }; - regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, + regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_REG_LOCATOR); if (!regloc) return -ENXIO; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 93992a1c8eec..4da07727ab9c 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -13,7 +13,6 @@ * "DVSEC" redundancies removed. When obvious, abbreviations may be used. */ #define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) -#define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2ff361e756d6..110478573296 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -817,7 +817,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); if (!cxlds->cxl_dvsec) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); -- cgit v1.2.3 From 934edcd436dca0447e0d3691a908394ba16d06c3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 2 May 2024 09:57:34 -0700 Subject: cxl: Add post-reset warning if reset results in loss of previously committed HDM decoders Secondary Bus Reset (SBR) is equivalent to a device being hot removed and inserted again. Doing a SBR on a CXL type 3 device is problematic if the exported device memory is part of system memory that cannot be offlined. The event is equivalent to violently ripping out that range of memory from the kernel. While the hardware requires the "Unmask SBR" bit set in the Port Control Extensions register and the kernel currently does not unmask it, user can unmask this bit via setpci or similar tool. The driver does not have a way to detect whether a reset coming from the PCI subsystem is a Function Level Reset (FLR) or SBR. The only way to detect is to note if a decoder is marked as enabled in software but the decoder control register indicates it's not committed. Add a helper function to find discrepancy between the decoder software state versus the hardware register state. Suggested-by: Dan Williams Link: https://lore.kernel.org/r/20240502165851.1948523-6-dave.jiang@intel.com Signed-off-by: Dave Jiang Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams --- drivers/cxl/core/pci.c | 29 +++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/pci.c | 22 ++++++++++++++++++++++ 3 files changed, 53 insertions(+) (limited to 'drivers/cxl') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c496a9710d62..8567dd11eaac 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1045,3 +1045,32 @@ long cxl_pci_get_latency(struct pci_dev *pdev) return cxl_flit_size(pdev) * MEGA / bw; } + +static int __cxl_endpoint_decoder_reset_detected(struct device *dev, void *data) +{ + struct cxl_port *port = data; + struct cxl_decoder *cxld; + struct cxl_hdm *cxlhdm; + void __iomem *hdm; + u32 ctrl; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) + return 0; + + cxlhdm = dev_get_drvdata(&port->dev); + hdm = cxlhdm->regs.hdm_decoder; + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); + + return !FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl); +} + +bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) +{ + return device_for_each_child(&port->dev, port, + __cxl_endpoint_decoder_reset_detected); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 534e25e2f0a4..e3c237c50b59 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -895,6 +895,8 @@ void cxl_coordinates_combine(struct access_coordinate *out, struct access_coordinate *c1, struct access_coordinate *c2); +bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 110478573296..f9d65f72601b 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -957,11 +957,33 @@ static void cxl_error_resume(struct pci_dev *pdev) dev->driver ? "successful" : "failed"); } +static void cxl_reset_done(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &pdev->dev; + + /* + * FLR does not expect to touch the HDM decoders and related + * registers. SBR, however, will wipe all device configurations. + * Issue a warning if there was an active decoder before the reset + * that no longer exists. + */ + guard(device)(&cxlmd->dev); + if (cxlmd->endpoint && + cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { + dev_crit(dev, "SBR happened without memory regions removal.\n"); + dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } +} + static const struct pci_error_handlers cxl_error_handlers = { .error_detected = cxl_error_detected, .slot_reset = cxl_slot_reset, .resume = cxl_error_resume, .cor_error_detected = cxl_cor_error_detected, + .reset_done = cxl_reset_done, }; static struct pci_driver cxl_pci_driver = { -- cgit v1.2.3