summaryrefslogtreecommitdiff
path: root/drivers/cxl/core
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2024-01-02 22:03:04 +0300
committerDan Williams <dan.j.williams@intel.com>2024-01-02 22:03:04 +0300
commit11c83932028714014e4259072bd230473d6db730 (patch)
treeba5de3af7a65a6a60ebb1f0a36328fd853130ecd /drivers/cxl/core
parent58f1e9d3a30438042fc9ed65b3dc56b2e5f7886a (diff)
parent185c1a489f873cb71520fc089401e02dbf302dcd (diff)
downloadlinux-11c83932028714014e4259072bd230473d6db730.tar.xz
Merge branch 'for-6.8/cxl-cdat' into for-6.8/cxl
Pick up the CDAT parsing and QOS class infrastructure for v6.8.
Diffstat (limited to 'drivers/cxl/core')
-rw-r--r--drivers/cxl/core/Makefile1
-rw-r--r--drivers/cxl/core/cdat.c517
-rw-r--r--drivers/cxl/core/core.h2
-rw-r--r--drivers/cxl/core/hdm.c5
-rw-r--r--drivers/cxl/core/mbox.c2
-rw-r--r--drivers/cxl/core/memdev.c27
-rw-r--r--drivers/cxl/core/pci.c49
-rw-r--r--drivers/cxl/core/pmu.c2
-rw-r--r--drivers/cxl/core/port.c134
-rw-r--r--drivers/cxl/core/region.c5
10 files changed, 714 insertions, 30 deletions
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 1f66b5d4d935..9259bcc6773c 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -13,5 +13,6 @@ cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
cxl_core-y += pmu.o
+cxl_core-y += cdat.o
cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
new file mode 100644
index 000000000000..cd84d87f597a
--- /dev/null
+++ b/drivers/cxl/core/cdat.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation. All rights reserved. */
+#include <linux/acpi.h>
+#include <linux/xarray.h>
+#include <linux/fw_table.h>
+#include <linux/node.h>
+#include <linux/overflow.h>
+#include "cxlpci.h"
+#include "cxlmem.h"
+#include "cxl.h"
+
+struct dsmas_entry {
+ struct range dpa_range;
+ u8 handle;
+ struct access_coordinate coord;
+
+ int entries;
+ int qos_class;
+};
+
+static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
+ const unsigned long end)
+{
+ struct acpi_cdat_header *hdr = &header->cdat;
+ struct acpi_cdat_dsmas *dsmas;
+ int size = sizeof(*hdr) + sizeof(*dsmas);
+ struct xarray *dsmas_xa = arg;
+ struct dsmas_entry *dent;
+ u16 len;
+ int rc;
+
+ len = le16_to_cpu((__force __le16)hdr->length);
+ if (len != size || (unsigned long)hdr + len > end) {
+ pr_warn("Malformed DSMAS table length: (%u:%u)\n", size, len);
+ return -EINVAL;
+ }
+
+ /* Skip common header */
+ dsmas = (struct acpi_cdat_dsmas *)(hdr + 1);
+
+ dent = kzalloc(sizeof(*dent), GFP_KERNEL);
+ if (!dent)
+ return -ENOMEM;
+
+ dent->handle = dsmas->dsmad_handle;
+ dent->dpa_range.start = le64_to_cpu((__force __le64)dsmas->dpa_base_address);
+ dent->dpa_range.end = le64_to_cpu((__force __le64)dsmas->dpa_base_address) +
+ le64_to_cpu((__force __le64)dsmas->dpa_length) - 1;
+
+ rc = xa_insert(dsmas_xa, dent->handle, dent, GFP_KERNEL);
+ if (rc) {
+ kfree(dent);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
+{
+ switch (access) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ coord->read_latency = val;
+ coord->write_latency = val;
+ break;
+ case ACPI_HMAT_READ_LATENCY:
+ coord->read_latency = val;
+ break;
+ case ACPI_HMAT_WRITE_LATENCY:
+ coord->write_latency = val;
+ break;
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ coord->read_bandwidth = val;
+ coord->write_bandwidth = val;
+ break;
+ case ACPI_HMAT_READ_BANDWIDTH:
+ coord->read_bandwidth = val;
+ break;
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ coord->write_bandwidth = val;
+ break;
+ }
+}
+
+static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
+ const unsigned long end)
+{
+ struct acpi_cdat_header *hdr = &header->cdat;
+ struct acpi_cdat_dslbis *dslbis;
+ int size = sizeof(*hdr) + sizeof(*dslbis);
+ struct xarray *dsmas_xa = arg;
+ struct dsmas_entry *dent;
+ __le64 le_base;
+ __le16 le_val;
+ u64 val;
+ u16 len;
+ int rc;
+
+ len = le16_to_cpu((__force __le16)hdr->length);
+ if (len != size || (unsigned long)hdr + len > end) {
+ pr_warn("Malformed DSLBIS table length: (%u:%u)\n", size, len);
+ return -EINVAL;
+ }
+
+ /* Skip common header */
+ dslbis = (struct acpi_cdat_dslbis *)(hdr + 1);
+
+ /* Skip unrecognized data type */
+ if (dslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
+ return 0;
+
+ /* Not a memory type, skip */
+ if ((dslbis->flags & ACPI_HMAT_MEMORY_HIERARCHY) != ACPI_HMAT_MEMORY)
+ return 0;
+
+ dent = xa_load(dsmas_xa, dslbis->handle);
+ if (!dent) {
+ pr_warn("No matching DSMAS entry for DSLBIS entry.\n");
+ return 0;
+ }
+
+ le_base = (__force __le64)dslbis->entry_base_unit;
+ le_val = (__force __le16)dslbis->entry[0];
+ rc = check_mul_overflow(le64_to_cpu(le_base),
+ le16_to_cpu(le_val), &val);
+ if (rc)
+ pr_warn("DSLBIS value overflowed.\n");
+
+ cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+
+ return 0;
+}
+
+static int cdat_table_parse_output(int rc)
+{
+ if (rc < 0)
+ return rc;
+ if (rc == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int cxl_cdat_endpoint_process(struct cxl_port *port,
+ struct xarray *dsmas_xa)
+{
+ int rc;
+
+ rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler,
+ dsmas_xa, port->cdat.table);
+ rc = cdat_table_parse_output(rc);
+ if (rc)
+ return rc;
+
+ rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler,
+ dsmas_xa, port->cdat.table);
+ return cdat_table_parse_output(rc);
+}
+
+static int cxl_port_perf_data_calculate(struct cxl_port *port,
+ struct xarray *dsmas_xa)
+{
+ struct access_coordinate c;
+ struct cxl_port *root_port;
+ struct cxl_root *cxl_root;
+ struct dsmas_entry *dent;
+ int valid_entries = 0;
+ unsigned long index;
+ int rc;
+
+ rc = cxl_endpoint_get_perf_coordinates(port, &c);
+ if (rc) {
+ dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n");
+ return rc;
+ }
+
+ root_port = find_cxl_root(port);
+ cxl_root = to_cxl_root(root_port);
+ if (!cxl_root->ops || !cxl_root->ops->qos_class)
+ return -EOPNOTSUPP;
+
+ xa_for_each(dsmas_xa, index, dent) {
+ int qos_class;
+
+ dent->coord.read_latency = dent->coord.read_latency +
+ c.read_latency;
+ dent->coord.write_latency = dent->coord.write_latency +
+ c.write_latency;
+ dent->coord.read_bandwidth = min_t(int, c.read_bandwidth,
+ dent->coord.read_bandwidth);
+ dent->coord.write_bandwidth = min_t(int, c.write_bandwidth,
+ dent->coord.write_bandwidth);
+
+ dent->entries = 1;
+ rc = cxl_root->ops->qos_class(root_port, &dent->coord, 1, &qos_class);
+ if (rc != 1)
+ continue;
+
+ valid_entries++;
+ dent->qos_class = qos_class;
+ }
+
+ if (!valid_entries)
+ return -ENOENT;
+
+ return 0;
+}
+
+static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
+ struct list_head *list)
+{
+ struct cxl_dpa_perf *dpa_perf;
+
+ dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
+ if (!dpa_perf)
+ return;
+
+ dpa_perf->dpa_range = dent->dpa_range;
+ dpa_perf->coord = dent->coord;
+ dpa_perf->qos_class = dent->qos_class;
+ list_add_tail(&dpa_perf->list, list);
+ dev_dbg(dev,
+ "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
+ dent->dpa_range.start, dpa_perf->qos_class,
+ dent->coord.read_bandwidth, dent->coord.write_bandwidth,
+ dent->coord.read_latency, dent->coord.write_latency);
+}
+
+static void free_perf_ents(void *data)
+{
+ struct cxl_memdev_state *mds = data;
+ struct cxl_dpa_perf *dpa_perf, *n;
+ LIST_HEAD(discard);
+
+ list_splice_tail_init(&mds->ram_perf_list, &discard);
+ list_splice_tail_init(&mds->pmem_perf_list, &discard);
+ list_for_each_entry_safe(dpa_perf, n, &discard, list) {
+ list_del(&dpa_perf->list);
+ kfree(dpa_perf);
+ }
+}
+
+static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
+ struct xarray *dsmas_xa)
+{
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct device *dev = cxlds->dev;
+ struct range pmem_range = {
+ .start = cxlds->pmem_res.start,
+ .end = cxlds->pmem_res.end,
+ };
+ struct range ram_range = {
+ .start = cxlds->ram_res.start,
+ .end = cxlds->ram_res.end,
+ };
+ struct dsmas_entry *dent;
+ unsigned long index;
+
+ xa_for_each(dsmas_xa, index, dent) {
+ if (resource_size(&cxlds->ram_res) &&
+ range_contains(&ram_range, &dent->dpa_range))
+ add_perf_entry(dev, dent, &mds->ram_perf_list);
+ else if (resource_size(&cxlds->pmem_res) &&
+ range_contains(&pmem_range, &dent->dpa_range))
+ add_perf_entry(dev, dent, &mds->pmem_perf_list);
+ else
+ dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
+ dent->dpa_range.start);
+ }
+
+ devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
+}
+
+static int match_cxlrd_qos_class(struct device *dev, void *data)
+{
+ int dev_qos_class = *(int *)data;
+ struct cxl_root_decoder *cxlrd;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ if (cxlrd->qos_class == CXL_QOS_CLASS_INVALID)
+ return 0;
+
+ if (cxlrd->qos_class == dev_qos_class)
+ return 1;
+
+ return 0;
+}
+
+static void cxl_qos_match(struct cxl_port *root_port,
+ struct list_head *work_list,
+ struct list_head *discard_list)
+{
+ struct cxl_dpa_perf *dpa_perf, *n;
+
+ list_for_each_entry_safe(dpa_perf, n, work_list, list) {
+ int rc;
+
+ if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+ return;
+
+ rc = device_for_each_child(&root_port->dev,
+ (void *)&dpa_perf->qos_class,
+ match_cxlrd_qos_class);
+ if (!rc)
+ list_move_tail(&dpa_perf->list, discard_list);
+ }
+}
+
+static int match_cxlrd_hb(struct device *dev, void *data)
+{
+ struct device *host_bridge = data;
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_root_decoder *cxlrd;
+ unsigned int seq;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ cxlsd = &cxlrd->cxlsd;
+
+ do {
+ seq = read_seqbegin(&cxlsd->target_lock);
+ for (int i = 0; i < cxlsd->nr_targets; i++) {
+ if (host_bridge == cxlsd->target[i]->dport_dev)
+ return 1;
+ }
+ } while (read_seqretry(&cxlsd->target_lock, seq));
+
+ return 0;
+}
+
+static void discard_dpa_perf(struct list_head *list)
+{
+ struct cxl_dpa_perf *dpa_perf, *n;
+
+ list_for_each_entry_safe(dpa_perf, n, list, list) {
+ list_del(&dpa_perf->list);
+ kfree(dpa_perf);
+ }
+}
+DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))
+
+static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
+{
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+ struct cxl_port *root_port __free(put_device) = NULL;
+ LIST_HEAD(__discard);
+ struct list_head *discard __free(dpa_perf) = &__discard;
+ int rc;
+
+ root_port = find_cxl_root(cxlmd->endpoint);
+ if (!root_port)
+ return -ENODEV;
+
+ /* Check that the QTG IDs are all sane between end device and root decoders */
+ cxl_qos_match(root_port, &mds->ram_perf_list, discard);
+ cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
+
+ /* Check to make sure that the device's host bridge is under a root decoder */
+ rc = device_for_each_child(&root_port->dev,
+ (void *)cxlmd->endpoint->host_bridge,
+ match_cxlrd_hb);
+ if (!rc) {
+ list_splice_tail_init(&mds->ram_perf_list, discard);
+ list_splice_tail_init(&mds->pmem_perf_list, discard);
+ }
+
+ return rc;
+}
+
+static void discard_dsmas(struct xarray *xa)
+{
+ unsigned long index;
+ void *ent;
+
+ xa_for_each(xa, index, ent) {
+ xa_erase(xa, index);
+ kfree(ent);
+ }
+ xa_destroy(xa);
+}
+DEFINE_FREE(dsmas, struct xarray *, if (_T) discard_dsmas(_T))
+
+void cxl_endpoint_parse_cdat(struct cxl_port *port)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct xarray __dsmas_xa;
+ struct xarray *dsmas_xa __free(dsmas) = &__dsmas_xa;
+ int rc;
+
+ xa_init(&__dsmas_xa);
+ if (!port->cdat.table)
+ return;
+
+ rc = cxl_cdat_endpoint_process(port, dsmas_xa);
+ if (rc < 0) {
+ dev_dbg(&port->dev, "Failed to parse CDAT: %d\n", rc);
+ return;
+ }
+
+ rc = cxl_port_perf_data_calculate(port, dsmas_xa);
+ if (rc) {
+ dev_dbg(&port->dev, "Failed to do perf coord calculations.\n");
+ return;
+ }
+
+ cxl_memdev_set_qos_class(cxlds, dsmas_xa);
+ cxl_qos_class_verify(cxlmd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
+
+static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
+ const unsigned long end)
+{
+ struct acpi_cdat_sslbis *sslbis;
+ int size = sizeof(header->cdat) + sizeof(*sslbis);
+ struct cxl_port *port = arg;
+ struct device *dev = &port->dev;
+ struct acpi_cdat_sslbe *entry;
+ int remain, entries, i;
+ u16 len;
+
+ len = le16_to_cpu((__force __le16)header->cdat.length);
+ remain = len - size;
+ if (!remain || remain % sizeof(*entry) ||
+ (unsigned long)header + len > end) {
+ dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len);
+ return -EINVAL;
+ }
+
+ /* Skip common header */
+ sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header +
+ sizeof(header->cdat));
+
+ /* Unrecognized data type, we can skip */
+ if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
+ return 0;
+
+ entries = remain / sizeof(*entry);
+ entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis));
+
+ for (i = 0; i < entries; i++) {
+ u16 x = le16_to_cpu((__force __le16)entry->portx_id);
+ u16 y = le16_to_cpu((__force __le16)entry->porty_id);
+ __le64 le_base;
+ __le16 le_val;
+ struct cxl_dport *dport;
+ unsigned long index;
+ u16 dsp_id;
+ u64 val;
+
+ switch (x) {
+ case ACPI_CDAT_SSLBIS_US_PORT:
+ dsp_id = y;
+ break;
+ case ACPI_CDAT_SSLBIS_ANY_PORT:
+ switch (y) {
+ case ACPI_CDAT_SSLBIS_US_PORT:
+ dsp_id = x;
+ break;
+ case ACPI_CDAT_SSLBIS_ANY_PORT:
+ dsp_id = ACPI_CDAT_SSLBIS_ANY_PORT;
+ break;
+ default:
+ dsp_id = y;
+ break;
+ }
+ break;
+ default:
+ dsp_id = x;
+ break;
+ }
+
+ le_base = (__force __le64)sslbis->entry_base_unit;
+ le_val = (__force __le16)entry->latency_or_bandwidth;
+
+ if (check_mul_overflow(le64_to_cpu(le_base),
+ le16_to_cpu(le_val), &val))
+ dev_warn(dev, "SSLBIS value overflowed!\n");
+
+ xa_for_each(&port->dports, index, dport) {
+ if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
+ dsp_id == dport->port_id)
+ cxl_access_coordinate_set(&dport->sw_coord,
+ sslbis->data_type,
+ val);
+ }
+
+ entry++;
+ }
+
+ return 0;
+}
+
+void cxl_switch_parse_cdat(struct cxl_port *port)
+{
+ int rc;
+
+ if (!port->cdat.table)
+ return;
+
+ rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler,
+ port, port->cdat.table);
+ rc = cdat_table_parse_output(rc);
+ if (rc)
+ dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
+
+MODULE_IMPORT_NS(CXL);
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 86d7ba23235e..3b64fb1b9ed0 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -88,4 +88,6 @@ enum cxl_poison_trace_type {
CXL_POISON_TRACE_CLEAR,
};
+long cxl_pci_get_latency(struct pci_dev *pdev);
+
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 1cc9be85ba4c..7d97790b893d 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -363,10 +363,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
{
resource_size_t base = -1;
- down_read(&cxl_dpa_rwsem);
+ lockdep_assert_held(&cxl_dpa_rwsem);
if (cxled->dpa_res)
base = cxled->dpa_res->start;
- up_read(&cxl_dpa_rwsem);
return base;
}
@@ -839,6 +838,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
cxld->target_type = CXL_DECODER_HOSTONLYMEM;
else
cxld->target_type = CXL_DECODER_DEVMEM;
+
+ guard(rwsem_write)(&cxl_region_rwsem);
if (cxld->id != cxl_num_decoders_committed(port)) {
dev_warn(&port->dev,
"decoder%d.%d: Committed out of order\n",
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index b86dbd25740c..d51a1f250c8c 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1405,6 +1405,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mds->cxlds.reg_map.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
+ INIT_LIST_HEAD(&mds->ram_perf_list);
+ INIT_LIST_HEAD(&mds->pmem_perf_list);
return mds;
}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index fc5c2b414793..2f43d368ba07 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -227,10 +227,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
if (!port || !is_cxl_endpoint(port))
return -EINVAL;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
+ rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc) {
+ up_read(&cxl_region_rwsem);
+ return rc;
+ }
+
if (cxl_num_decoders_committed(port) == 0) {
/* No regions mapped to this memdev */
rc = cxl_get_poison_by_memdev(cxlmd);
@@ -239,6 +245,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
rc = cxl_get_poison_by_endpoint(port);
}
up_read(&cxl_dpa_rwsem);
+ up_read(&cxl_region_rwsem);
return rc;
}
@@ -324,10 +331,16 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
+ rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc) {
+ up_read(&cxl_region_rwsem);
+ return rc;
+ }
+
rc = cxl_validate_poison_dpa(cxlmd, dpa);
if (rc)
goto out;
@@ -355,6 +368,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
out:
up_read(&cxl_dpa_rwsem);
+ up_read(&cxl_region_rwsem);
return rc;
}
@@ -372,10 +386,16 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
- rc = down_read_interruptible(&cxl_dpa_rwsem);
+ rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
+ rc = down_read_interruptible(&cxl_dpa_rwsem);
+ if (rc) {
+ up_read(&cxl_region_rwsem);
+ return rc;
+ }
+
rc = cxl_validate_poison_dpa(cxlmd, dpa);
if (rc)
goto out;
@@ -412,6 +432,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
out:
up_read(&cxl_dpa_rwsem);
+ up_read(&cxl_region_rwsem);
return rc;
}
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index eff20e83d0a6..6c9c8d92f8f7 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+#include <linux/units.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
#include <linux/delay.h>
@@ -620,7 +621,7 @@ void read_cdat_data(struct cxl_port *port)
struct pci_dev *pdev = NULL;
struct cxl_memdev *cxlmd;
size_t cdat_length;
- void *cdat_table;
+ void *cdat_table, *cdat_buf;
int rc;
if (is_cxl_memdev(uport)) {
@@ -651,16 +652,15 @@ void read_cdat_data(struct cxl_port *port)
return;
}
- cdat_table = devm_kzalloc(dev, cdat_length + sizeof(__le32),
- GFP_KERNEL);
- if (!cdat_table)
+ cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL);
+ if (!cdat_buf)
return;
- rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length);
+ rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
if (rc)
goto err;
- cdat_table = cdat_table + sizeof(__le32);
+ cdat_table = cdat_buf + sizeof(__le32);
if (cdat_checksum(cdat_table, cdat_length))
goto err;
@@ -670,7 +670,7 @@ void read_cdat_data(struct cxl_port *port)
err:
/* Don't leave table data allocated on error */
- devm_kfree(dev, cdat_table);
+ devm_kfree(dev, cdat_buf);
dev_err(dev, "Failed to read/validate CDAT.\n");
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
@@ -980,3 +980,38 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL);
+
+static int cxl_flit_size(struct pci_dev *pdev)
+{
+ if (cxl_pci_flit_256(pdev))
+ return 256;
+
+ return 68;
+}
+
+/**
+ * cxl_pci_get_latency - calculate the link latency for the PCIe link
+ * @pdev: PCI device
+ *
+ * return: calculated latency or 0 for no latency
+ *
+ * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation
+ * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency
+ * LinkProgationLatency is negligible, so 0 will be used
+ * RetimerLatency is assumed to be negligible and 0 will be used
+ * FlitLatency = FlitSize / LinkBandwidth
+ * FlitSize is defined by spec. CXL rev3.0 4.2.1.
+ * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used.
+ * The FlitLatency is converted to picoseconds.
+ */
+long cxl_pci_get_latency(struct pci_dev *pdev)
+{
+ long bw;
+
+ bw = pcie_link_speed_mbps(pdev);
+ if (bw < 0)
+ return 0;
+ bw /= BITS_PER_BYTE;
+
+ return cxl_flit_size(pdev) * MEGA / bw;
+}
diff --git a/drivers/cxl/core/pmu.c b/drivers/cxl/core/pmu.c
index 7684c843e5a5..5d8e06b0ba6e 100644
--- a/drivers/cxl/core/pmu.c
+++ b/drivers/cxl/core/pmu.c
@@ -23,7 +23,7 @@ const struct device_type cxl_pmu_type = {
static void remove_dev(void *dev)
{
- device_del(dev);
+ device_unregister(dev);
}
int devm_cxl_pmu_add(struct device *parent, struct cxl_pmu_regs *regs,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 38441634e4c6..8c00fd6be730 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -9,6 +9,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/idr.h>
+#include <linux/node.h>
#include <cxlmem.h>
#include <cxlpci.h>
#include <cxl.h>
@@ -226,9 +227,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at
char *buf)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- u64 base = cxl_dpa_resource_start(cxled);
- return sysfs_emit(buf, "%#llx\n", base);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
+ return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled));
}
static DEVICE_ATTR_RO(dpa_resource);
@@ -541,7 +542,10 @@ static void cxl_port_release(struct device *dev)
xa_destroy(&port->dports);
xa_destroy(&port->regions);
ida_free(&cxl_port_ida, port->id);
- kfree(port);
+ if (is_cxl_root(port))
+ kfree(to_cxl_root(port));
+ else
+ kfree(port);
}
static ssize_t decoders_committed_show(struct device *dev,
@@ -669,17 +673,31 @@ static struct lock_class_key cxl_port_key;
static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
struct cxl_dport *parent_dport)
{
- struct cxl_port *port;
+ struct cxl_root *cxl_root __free(kfree) = NULL;
+ struct cxl_port *port, *_port __free(kfree) = NULL;
struct device *dev;
int rc;
- port = kzalloc(sizeof(*port), GFP_KERNEL);
- if (!port)
- return ERR_PTR(-ENOMEM);
+ /* No parent_dport, root cxl_port */
+ if (!parent_dport) {
+ cxl_root = kzalloc(sizeof(*cxl_root), GFP_KERNEL);
+ if (!cxl_root)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ _port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!_port)
+ return ERR_PTR(-ENOMEM);
+ }
rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
if (rc < 0)
- goto err;
+ return ERR_PTR(rc);
+
+ if (cxl_root)
+ port = &no_free_ptr(cxl_root)->port;
+ else
+ port = no_free_ptr(_port);
+
port->id = rc;
port->uport_dev = uport_dev;
@@ -731,10 +749,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
dev->type = &cxl_port_type;
return port;
-
-err:
- kfree(port);
- return ERR_PTR(rc);
}
static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map,
@@ -841,6 +855,9 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
if (rc)
return ERR_PTR(rc);
+ if (parent_dport && dev_is_pci(uport_dev))
+ port->pci_latency = cxl_pci_get_latency(to_pci_dev(uport_dev));
+
return port;
err:
@@ -884,6 +901,22 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
+struct cxl_root *devm_cxl_add_root(struct device *host,
+ const struct cxl_root_ops *ops)
+{
+ struct cxl_root *cxl_root;
+ struct cxl_port *port;
+
+ port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
+ if (IS_ERR(port))
+ return (struct cxl_root *)port;
+
+ cxl_root = to_cxl_root(port);
+ cxl_root->ops = ops;
+ return cxl_root;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, CXL);
+
struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
{
/* There is no pci_bus associated with a CXL platform-root port */
@@ -1108,6 +1141,9 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
if (rc)
return ERR_PTR(rc);
+ if (dev_is_pci(dport_dev))
+ dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev));
+
return dport;
}
@@ -2059,6 +2095,80 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
+static void combine_coordinates(struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ if (c2->write_bandwidth)
+ c1->write_bandwidth = min(c1->write_bandwidth,
+ c2->write_bandwidth);
+ c1->write_latency += c2->write_latency;
+
+ if (c2->read_bandwidth)
+ c1->read_bandwidth = min(c1->read_bandwidth,
+ c2->read_bandwidth);
+ c1->read_latency += c2->read_latency;
+}
+
+/**
+ * cxl_endpoint_get_perf_coordinates - Retrieve performance numbers stored in dports
+ * of CXL path
+ * @port: endpoint cxl_port
+ * @coord: output performance data
+ *
+ * Return: errno on failure, 0 on success.
+ */
+int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
+ struct access_coordinate *coord)
+{
+ struct access_coordinate c = {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ };
+ struct cxl_port *iter = port;
+ struct cxl_dport *dport;
+ struct pci_dev *pdev;
+ unsigned int bw;
+
+ if (!is_cxl_endpoint(port))
+ return -EINVAL;
+
+ dport = iter->parent_dport;
+
+ /*
+ * Exit the loop when the parent port of the current port is cxl root.
+ * The iterative loop starts at the endpoint and gathers the
+ * latency of the CXL link from the current iter to the next downstream
+ * port each iteration. If the parent is cxl root then there is
+ * nothing to gather.
+ */
+ while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
+ combine_coordinates(&c, &dport->sw_coord);
+ c.write_latency += dport->link_latency;
+ c.read_latency += dport->link_latency;
+
+ iter = to_cxl_port(iter->dev.parent);
+ dport = iter->parent_dport;
+ }
+
+ /* Augment with the generic port (host bridge) perf data */
+ combine_coordinates(&c, &dport->hb_coord);
+
+ /* Get the calculated PCI paths bandwidth */
+ pdev = to_pci_dev(port->uport_dev->parent);
+ bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL);
+ if (bw == 0)
+ return -ENXIO;
+ bw /= BITS_PER_BYTE;
+
+ c.write_bandwidth = min(c.write_bandwidth, bw);
+ c.read_bandwidth = min(c.read_bandwidth, bw);
+
+ *coord = c;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_get_perf_coordinates, CXL);
+
/* for user tooling to ensure port disable work has completed */
static ssize_t flush_store(const struct bus_type *bus, const char *buf, size_t count)
{
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 0e88f1aed018..57a5901d5a60 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2468,10 +2468,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
struct cxl_poison_context ctx;
int rc = 0;
- rc = down_read_interruptible(&cxl_region_rwsem);
- if (rc)
- return rc;
-
ctx = (struct cxl_poison_context) {
.port = port
};
@@ -2481,7 +2477,6 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port)
rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
&ctx);
- up_read(&cxl_region_rwsem);
return rc;
}