diff options
Diffstat (limited to 'drivers/cxl')
-rw-r--r-- | drivers/cxl/acpi.c | 3 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 2 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 93 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 60 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 161 | ||||
-rw-r--r-- | drivers/cxl/core/pci.c | 275 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 184 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 266 | ||||
-rw-r--r-- | drivers/cxl/core/regs.c | 73 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 38 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 17 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 7 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 107 | ||||
-rw-r--r-- | drivers/cxl/port.c | 3 |
14 files changed, 856 insertions, 433 deletions
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40d055560e52..2034eb4ce83f 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -289,6 +289,9 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, } } } + + cxlrd->qos_class = cfmws->qtg_id; + rc = cxl_decoder_add(cxld, target_map); err_xormap: if (rc) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 45e7e044cf4a..86d7ba23235e 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -73,8 +73,10 @@ struct cxl_rcrb_info; resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); extern struct rw_semaphore cxl_dpa_rwsem; +extern struct rw_semaphore cxl_region_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 4449b34a80cc..1cc9be85ba4c 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(14, 12); } -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, - struct cxl_component_regs *regs) -{ - struct cxl_register_map map = { - .dev = &port->dev, - .resource = port->component_reg_phys, - .base = crb, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, - }; - - cxl_probe_component_regs(&port->dev, crb, &map.component_map); - if (!map.component_map.hdm_decoder.valid) { - dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); - /* unique error code to indicate no HDM decoder capability */ - return -ENODEV; - } - - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM)); -} - static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm; @@ -153,9 +133,9 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { + struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; struct cxl_hdm *cxlhdm; - void __iomem *crb; int rc; cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); @@ -164,19 +144,29 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, cxlhdm->port = port; dev_set_drvdata(dev, cxlhdm); - crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb && info && info->mem_enabled) { + /* Memory devices can configure device HDM using DVSEC range regs. */ + if (reg_map->resource == CXL_RESOURCE_NONE) { + if (!info || !info->mem_enabled) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + cxlhdm->decoder_count = info->ranges; return cxlhdm; - } else if (!crb) { - dev_err(dev, "No component registers mapped\n"); - return ERR_PTR(-ENXIO); } - rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs); - iounmap(crb); - if (rc) + if (!reg_map->component_map.hdm_decoder.valid) { + dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); + /* unique error code to indicate no HDM decoder capability */ + return ERR_PTR(-ENODEV); + } + + rc = cxl_map_component_regs(reg_map, &cxlhdm->regs, + BIT(CXL_CM_CAP_CAP_ID_HDM)); + if (rc) { + dev_err(dev, "Failed to map HDM capability.\n"); return ERR_PTR(rc); + } parse_hdm_decoder_caps(cxlhdm); if (cxlhdm->decoder_count == 0) { @@ -575,17 +565,11 @@ static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl) CXL_HDM_DECODER0_CTRL_HOSTONLY); } -static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) +static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) { struct cxl_dport **t = &cxlsd->target[0]; int ways = cxlsd->cxld.interleave_ways; - if (dev_WARN_ONCE(&cxlsd->cxld.dev, - ways > 8 || ways > cxlsd->nr_targets, - "ways: %d overflows targets: %d\n", ways, - cxlsd->nr_targets)) - return -ENXIO; - *tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id); if (ways > 1) *tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id); @@ -601,8 +585,6 @@ static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) *tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id); if (ways > 7) *tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id); - - return 0; } /* @@ -643,13 +625,33 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) if (cxld->flags & CXL_DECODER_F_ENABLE) return 0; - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } + /* + * For endpoint decoders hosted on CXL memory devices that + * support the sanitize operation, make sure sanitize is not in-flight. + */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); + + if (mds && mds->security.sanitize_active) { + dev_dbg(&cxlmd->dev, + "attempted to commit %s during sanitize\n", + dev_name(&cxld->dev)); + return -EBUSY; + } + } + down_read(&cxl_dpa_rwsem); /* common decoder settings */ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); @@ -670,13 +672,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id); u64 targets; - rc = cxlsd_set_targets(cxlsd, &targets); - if (rc) { - dev_dbg(&port->dev, "%s: target configuration error\n", - dev_name(&cxld->dev)); - goto err; - } - + cxlsd_set_targets(cxlsd, &targets); writel(upper_32_bits(targets), tl_hi); writel(lower_32_bits(targets), tl_lo); } else { @@ -694,7 +690,6 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); -err: if (rc) { dev_dbg(&port->dev, "%s: error %d committing decoder\n", dev_name(&cxld->dev), rc); @@ -844,7 +839,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; - if (cxld->id != port->commit_end + 1) { + if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", port->id, cxld->id); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 4df4f614f490..36270dcfb42e 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1125,20 +1125,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); -/** - * cxl_mem_sanitize() - Send a sanitization command to the device. - * @mds: The device data for the operation - * @cmd: The specific sanitization command opcode - * - * Return: 0 if the command was executed successfully, regardless of - * whether or not the actual security operation is done in the background, - * such as for the Sanitize case. - * Error return values can be the result of the mailbox command, -EINVAL - * when security requirements are not met or invalid contexts. - * - * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. - */ -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) +static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) { int rc; u32 sec_out = 0; @@ -1183,7 +1170,45 @@ int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL); + + +/** + * cxl_mem_sanitize() - Send a sanitization command to the device. + * @cxlmd: The device for the operation + * @cmd: The specific sanitization command opcode + * + * Return: 0 if the command was executed successfully, regardless of + * whether or not the actual security operation is done in the background, + * such as for the Sanitize case. + * Error return values can be the result of the mailbox command, -EINVAL + * when security requirements are not met or invalid contexts, or -EBUSY + * if the sanitize operation is already in flight. + * + * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. + */ +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_port *endpoint; + int rc; + + /* synchronize with cxl_mem_probe() and decoder write operations */ + device_lock(&cxlmd->dev); + endpoint = cxlmd->endpoint; + down_read(&cxl_region_rwsem); + /* + * Require an endpoint to be safe otherwise the driver can not + * be sure that the device is unmapped. + */ + if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + rc = __cxl_mem_sanitize(mds, cmd); + else + rc = -EBUSY; + up_read(&cxl_region_rwsem); + device_unlock(&cxlmd->dev); + + return rc; +} static int add_dpa_res(struct device *dev, struct resource *parent, struct resource *res, resource_size_t start, @@ -1224,8 +1249,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return 0; } - cxlds->dpa_res = - (struct resource)DEFINE_RES_MEM(0, mds->total_bytes); + cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); if (mds->partition_align_bytes == 0) { rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, @@ -1377,6 +1401,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mutex_init(&mds->mbox_mutex); mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; + mds->cxlds.reg_map.host = dev; + mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; return mds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 14b547c07f54..fc5c2b414793 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -125,13 +125,16 @@ static ssize_t security_state_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); - u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg); - u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); unsigned long state = mds->security.state; + int rc = 0; - if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100) - return sysfs_emit(buf, "sanitize\n"); + /* sync with latest submission state */ + mutex_lock(&mds->mbox_mutex); + if (mds->security.sanitize_active) + rc = sysfs_emit(buf, "sanitize\n"); + mutex_unlock(&mds->mbox_mutex); + if (rc) + return rc; if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) return sysfs_emit(buf, "disabled\n"); @@ -152,24 +155,17 @@ static ssize_t security_sanitize_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; bool sanitize; ssize_t rc; if (kstrtobool(buf, &sanitize) || !sanitize) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SANITIZE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_sanitize = __ATTR(sanitize, 0200, NULL, security_sanitize_store); @@ -179,24 +175,17 @@ static ssize_t security_erase_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; ssize_t rc; bool erase; if (kstrtobool(buf, &erase) || !erase) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SECURE_ERASE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); @@ -242,7 +231,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (rc) return rc; - if (port->commit_end == -1) { + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); } else { @@ -293,7 +282,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) .dpa = dpa, }; port = cxlmd->endpoint; - if (port && is_cxl_endpoint(port) && port->commit_end != -1) + if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); return ctx.cxlr; @@ -556,21 +545,11 @@ void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); -static void cxl_memdev_security_shutdown(struct device *dev) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - - if (mds->security.poll) - cancel_delayed_work_sync(&mds->security.poll_dwork); -} - static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); down_write(&cxl_memdev_rwsem); - cxl_memdev_security_shutdown(dev); cxlmd->cxlds = NULL; up_write(&cxl_memdev_rwsem); } @@ -580,8 +559,8 @@ static void cxl_memdev_unregister(void *_cxlmd) struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; - cxl_memdev_shutdown(dev); cdev_device_del(&cxlmd->cdev, dev); + cxl_memdev_shutdown(dev); put_device(dev); } @@ -961,17 +940,16 @@ static const struct fw_upload_ops cxl_memdev_fw_ops = { .cleanup = cxl_fw_cleanup, }; -static void devm_cxl_remove_fw_upload(void *fwl) +static void cxl_remove_fw_upload(void *fwl) { firmware_upload_unregister(fwl); } -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = &cxlds->cxlmd->dev; struct fw_upload *fwl; - int rc; if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) return 0; @@ -979,19 +957,10 @@ int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), &cxl_memdev_fw_ops, mds); if (IS_ERR(fwl)) - return dev_err_probe(dev, PTR_ERR(fwl), - "Failed to register firmware loader\n"); - - rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload, - fwl); - if (rc) - dev_err(dev, - "Failed to add firmware loader remove action: %d\n", - rc); - - return rc; + return PTR_ERR(fwl); + return devm_add_action_or_reset(host, cxl_remove_fw_upload, fwl); } -EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL); +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fw_upload, CXL); static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, @@ -1002,36 +971,8 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -static void put_sanitize(void *data) -{ - struct cxl_memdev_state *mds = data; - - sysfs_put(mds->security.sanitize_node); -} - -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct device *dev = &cxlmd->dev; - struct kernfs_node *sec; - - sec = sysfs_get_dirent(dev->kobj.sd, "security"); - if (!sec) { - dev_err(dev, "sysfs_get_dirent 'security' failed\n"); - return -ENODEV; - } - mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); - sysfs_put(sec); - if (!mds->security.sanitize_node) { - dev_err(dev, "sysfs_get_dirent 'state' failed\n"); - return -ENODEV; - } - - return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); - } - -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds) { struct cxl_memdev *cxlmd; struct device *dev; @@ -1059,11 +1000,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) if (rc) goto err; - rc = cxl_memdev_security_init(cxlmd); - if (rc) - goto err; - - rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); if (rc) return ERR_PTR(rc); return cxlmd; @@ -1079,6 +1016,50 @@ err: } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); +static void sanitize_teardown_notifier(void *data) +{ + struct cxl_memdev_state *mds = data; + struct kernfs_node *state; + + /* + * Prevent new irq triggered invocations of the workqueue and + * flush inflight invocations. + */ + mutex_lock(&mds->mbox_mutex); + state = mds->security.sanitize_node; + mds->security.sanitize_node = NULL; + mutex_unlock(&mds->mbox_mutex); + + cancel_delayed_work_sync(&mds->security.poll_dwork); + sysfs_put(state); +} + +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct kernfs_node *sec; + + if (!test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds)) + return 0; + + /* + * Note, the expectation is that @cxlmd would have failed to be + * created if these sysfs_get_dirent calls fail. + */ + sec = sysfs_get_dirent(cxlmd->dev.kobj.sd, "security"); + if (!sec) + return -ENOENT; + mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); + sysfs_put(sec); + if (!mds->security.sanitize_node) + return -ENOENT; + + return devm_add_action_or_reset(host, sanitize_teardown_notifier, mds); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_sanitize_setup_notifier, CXL); + __init int cxl_memdev_init(void) { dev_t devt; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c7a7887ebdcf..eff20e83d0a6 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -5,6 +5,7 @@ #include <linux/delay.h> #include <linux/pci.h> #include <linux/pci-doe.h> +#include <linux/aer.h> #include <cxlpci.h> #include <cxlmem.h> #include <cxl.h> @@ -595,6 +596,16 @@ static int cxl_cdat_read_table(struct device *dev, return 0; } +static unsigned char cdat_checksum(void *buf, size_t size) +{ + unsigned char sum, *data = buf; + size_t i; + + for (sum = 0, i = 0; i < size; i++) + sum += data[i]; + return sum; +} + /** * read_cdat_data - Read the CDAT data on this port * @port: Port to read data from @@ -603,18 +614,30 @@ static int cxl_cdat_read_table(struct device *dev, */ void read_cdat_data(struct cxl_port *port) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct device *host = cxlmd->dev.parent; + struct device *uport = port->uport_dev; struct device *dev = &port->dev; struct pci_doe_mb *cdat_doe; + struct pci_dev *pdev = NULL; + struct cxl_memdev *cxlmd; size_t cdat_length; void *cdat_table; int rc; - if (!dev_is_pci(host)) + if (is_cxl_memdev(uport)) { + struct device *host; + + cxlmd = to_cxl_memdev(uport); + host = cxlmd->dev.parent; + if (dev_is_pci(host)) + pdev = to_pci_dev(host); + } else if (dev_is_pci(uport)) { + pdev = to_pci_dev(uport); + } + + if (!pdev) return; - cdat_doe = pci_find_doe_mailbox(to_pci_dev(host), - PCI_DVSEC_VENDOR_ID_CXL, + + cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS); if (!cdat_doe) { dev_dbg(dev, "No CDAT mailbox\n"); @@ -634,44 +657,54 @@ void read_cdat_data(struct cxl_port *port) return; rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); - if (rc) { - /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); - dev_err(dev, "CDAT data read error\n"); - return; - } + if (rc) + goto err; + + cdat_table = cdat_table + sizeof(__le32); + if (cdat_checksum(cdat_table, cdat_length)) + goto err; - port->cdat.table = cdat_table + sizeof(__le32); + port->cdat.table = cdat_table; port->cdat.length = cdat_length; + return; + +err: + /* Don't leave table data allocated on error */ + devm_kfree(dev, cdat_table); + dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -void cxl_cor_error_detected(struct pci_dev *pdev) +static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 status; - if (!cxlds->regs.ras) + if (!ras_base) return; - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); +} /* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +static void header_log_copy(void __iomem *ras_base, u32 *log) { void __iomem *addr; u32 *log_addr; int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; log_addr = log; for (i = 0; i < log_u32_size; i++) { @@ -685,17 +718,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; u32 fe; - if (!cxlds->regs.ras) + if (!ras_base) return false; - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) return false; @@ -703,7 +737,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(rcc_addr))); @@ -711,13 +745,201 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) fe = status; } - header_log_copy(cxlds, hl); + header_log_copy(ras_base, hl); trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; } +static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_ras(cxlds, cxlds->regs.ras); +} + +#ifdef CONFIG_PCIEAER_CXL + +static void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + struct cxl_rcrb_info *ri = &dport->rcrb; + void __iomem *dport_aer = NULL; + resource_size_t aer_phys; + struct device *host; + + if (dport->rch && ri->aer_cap) { + host = dport->reg_map.host; + aer_phys = ri->aer_cap + ri->base; + dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } + + dport->regs.dport_aer = dport_aer; +} + +static void cxl_dport_map_regs(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); + + if (dport->rch) + cxl_dport_map_rch_aer(dport); +} + +static void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + struct pci_host_bridge *bridge; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + bridge = to_pci_host_bridge(dport->dport_dev); + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + if (bridge->native_aer) { + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); + } +} + +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + struct device *dport_dev = dport->dport_dev; + struct pci_host_bridge *host_bridge; + + host_bridge = to_pci_host_bridge(dport_dev); + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + + dport->reg_map.host = host; + cxl_dport_map_regs(dport); + + if (dport->rch) + cxl_disable_rch_root_ints(dport); +} +EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL); + +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_cor_ras(cxlds, dport->regs.ras); +} + +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_ras(cxlds, dport->regs.ras); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + struct cxl_port *port; + int severity; + + port = cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + put_device(&port->dev); + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + + if (severity == AER_CORRECTABLE) + cxl_handle_rdport_cor_ras(cxlds, dport); + else + cxl_handle_rdport_ras(cxlds, dport); +} + +#else +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_endpoint_cor_ras(cxlds); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -726,13 +948,16 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* * A frozen channel indicates an impending reset which is fatal to * CXL.mem operation, and will likely crash the system. On the off * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_report_and_clear(cxlds); + ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7ca01a834e18..38441634e4c6 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -28,9 +28,22 @@ * instantiated by the core. */ +/* + * All changes to the interleave configuration occur with this lock held + * for write. + */ +DECLARE_RWSEM(cxl_region_rwsem); + static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +int cxl_num_decoders_committed(struct cxl_port *port) +{ + lockdep_assert_held(&cxl_region_rwsem); + + return port->commit_end + 1; +} + static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -278,6 +291,15 @@ static ssize_t interleave_ways_show(struct device *dev, static DEVICE_ATTR_RO(interleave_ways); +static ssize_t qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + + return sysfs_emit(buf, "%d\n", cxlrd->qos_class); +} +static DEVICE_ATTR_RO(qos_class); + static struct attribute *cxl_decoder_base_attrs[] = { &dev_attr_start.attr, &dev_attr_size.attr, @@ -297,6 +319,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type2.attr, &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, + &dev_attr_qos_class.attr, SET_CXL_REGION_ATTR(create_pmem_region) SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) @@ -521,8 +544,33 @@ static void cxl_port_release(struct device *dev) kfree(port); } +static ssize_t decoders_committed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port = to_cxl_port(dev); + int rc; + + down_read(&cxl_region_rwsem); + rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); + up_read(&cxl_region_rwsem); + + return rc; +} + +static DEVICE_ATTR_RO(decoders_committed); + +static struct attribute *cxl_port_attrs[] = { + &dev_attr_decoders_committed.attr, + NULL, +}; + +static struct attribute_group cxl_port_attribute_group = { + .attrs = cxl_port_attrs, +}; + static const struct attribute_group *cxl_port_attribute_groups[] = { &cxl_base_attribute_group, + &cxl_port_attribute_group, NULL, }; @@ -619,7 +667,6 @@ static int devm_cxl_link_parent_dport(struct device *host, static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, - resource_size_t component_reg_phys, struct cxl_dport *parent_dport) { struct cxl_port *port; @@ -670,7 +717,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, } else dev->parent = uport_dev; - port->component_reg_phys = component_reg_phys; ida_init(&port->decoder_ida); port->hdm_end = -1; port->commit_end = -1; @@ -691,19 +737,21 @@ err: return ERR_PTR(rc); } -static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map, +static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, resource_size_t component_reg_phys) { - if (component_reg_phys == CXL_RESOURCE_NONE) - return 0; - *map = (struct cxl_register_map) { - .dev = dev, - .reg_type = CXL_REGLOC_RBI_COMPONENT, + .host = host, + .reg_type = CXL_REGLOC_RBI_EMPTY, .resource = component_reg_phys, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, }; + if (component_reg_phys == CXL_RESOURCE_NONE) + return 0; + + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + return cxl_setup_regs(map); } @@ -712,17 +760,27 @@ static int cxl_port_setup_regs(struct cxl_port *port, { if (dev_is_platform(port->uport_dev)) return 0; - return cxl_setup_comp_regs(&port->dev, &port->comp_map, + return cxl_setup_comp_regs(&port->dev, &port->reg_map, component_reg_phys); } -static int cxl_dport_setup_regs(struct cxl_dport *dport, +static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, resource_size_t component_reg_phys) { + int rc; + if (dev_is_platform(dport->dport_dev)) return 0; - return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map, - component_reg_phys); + + /* + * use @dport->dport_dev for the context for error messages during + * register probing, and fixup @host after the fact, since @host may be + * NULL. + */ + rc = cxl_setup_comp_regs(dport->dport_dev, &dport->reg_map, + component_reg_phys); + dport->reg_map.host = host; + return rc; } static struct cxl_port *__devm_cxl_add_port(struct device *host, @@ -734,21 +792,36 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, struct device *dev; int rc; - port = cxl_port_alloc(uport_dev, component_reg_phys, parent_dport); + port = cxl_port_alloc(uport_dev, parent_dport); if (IS_ERR(port)) return port; dev = &port->dev; - if (is_cxl_memdev(uport_dev)) + if (is_cxl_memdev(uport_dev)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + rc = dev_set_name(dev, "endpoint%d", port->id); - else if (parent_dport) + if (rc) + goto err; + + /* + * The endpoint driver already enumerated the component and RAS + * registers. Reuse that enumeration while prepping them to be + * mapped by the cxl_port driver. + */ + port->reg_map = cxlds->reg_map; + port->reg_map.host = &port->dev; + } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); - else - rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; + if (rc) + goto err; - rc = cxl_port_setup_regs(port, component_reg_phys); + rc = cxl_port_setup_regs(port, component_reg_phys); + if (rc) + goto err; + } else + rc = dev_set_name(dev, "root%d", port->id); if (rc) goto err; @@ -983,7 +1056,16 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (!dport) return ERR_PTR(-ENOMEM); - if (rcrb != CXL_RESOURCE_NONE) { + dport->dport_dev = dport_dev; + dport->port_id = port_id; + dport->port = port; + + if (rcrb == CXL_RESOURCE_NONE) { + rc = cxl_dport_setup_regs(&port->dev, dport, + component_reg_phys); + if (rc) + return ERR_PTR(rc); + } else { dport->rcrb.base = rcrb; component_reg_phys = __rcrb_to_component(dport_dev, &dport->rcrb, CXL_RCRB_DOWNSTREAM); @@ -992,6 +1074,14 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, return ERR_PTR(-ENXIO); } + /* + * RCH @dport is not ready to map until associated with its + * memdev + */ + rc = cxl_dport_setup_regs(NULL, dport, component_reg_phys); + if (rc) + return ERR_PTR(rc); + dport->rch = true; } @@ -999,14 +1089,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, dev_dbg(dport_dev, "Component Registers found for dport: %pa\n", &component_reg_phys); - dport->dport_dev = dport_dev; - dport->port_id = port_id; - dport->port = port; - - rc = cxl_dport_setup_regs(dport, component_reg_phys); - if (rc) - return ERR_PTR(rc); - cond_cxl_root_lock(port); rc = add_dport(port, dport); cond_cxl_root_unlock(port); @@ -1217,35 +1299,39 @@ static struct device *grandparent(struct device *dev) return NULL; } +static struct device *endpoint_host(struct cxl_port *endpoint) +{ + struct cxl_port *port = to_cxl_port(endpoint->dev.parent); + + if (is_cxl_root(port)) + return port->uport_dev; + return &port->dev; +} + static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct cxl_port *parent_port; - struct device *parent; + struct device *host = endpoint_host(endpoint); - parent_port = cxl_mem_find_port(cxlmd, NULL); - if (!parent_port) - goto out; - parent = &parent_port->dev; - - device_lock(parent); - if (parent->driver && !endpoint->dead) { - devm_release_action(parent, cxl_unlink_parent_dport, endpoint); - devm_release_action(parent, cxl_unlink_uport, endpoint); - devm_release_action(parent, unregister_port, endpoint); + device_lock(host); + if (host->driver && !endpoint->dead) { + devm_release_action(host, cxl_unlink_parent_dport, endpoint); + devm_release_action(host, cxl_unlink_uport, endpoint); + devm_release_action(host, unregister_port, endpoint); } cxlmd->endpoint = NULL; - device_unlock(parent); - put_device(parent); -out: + device_unlock(host); put_device(&endpoint->dev); + put_device(host); } int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { + struct device *host = endpoint_host(endpoint); struct device *dev = &cxlmd->dev; + get_device(host); get_device(&endpoint->dev); cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; @@ -1468,7 +1554,11 @@ retry: struct cxl_dport *dport; struct cxl_port *port; - if (!dport_dev) + /* + * The terminal "grandparent" in PCI is NULL and @platform_bus + * for platform devices + */ + if (!dport_dev || dport_dev == &platform_bus) return 0; uport_dev = dport_dev->parent; @@ -1691,6 +1781,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } atomic_set(&cxlrd->region_id, rc); + cxlrd->qos_class = CXL_QOS_CLASS_INVALID; return cxlrd; } EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); @@ -2062,3 +2153,4 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6d63b8798c29..56e575c79bb4 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -28,12 +28,6 @@ * 3. Decoder targets */ -/* - * All changes to the interleave configuration occur with this lock held - * for write. - */ -static DECLARE_RWSEM(cxl_region_rwsem); - static struct cxl_region *to_cxl_region(struct device *dev); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, @@ -129,7 +123,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn_once( + dev_info_once( &cxlr->dev, "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); return 0; @@ -294,7 +288,7 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, */ rc = cxl_region_invalidate_memregion(cxlr); if (rc) - return rc; + goto out; if (commit) { rc = cxl_region_decode_commit(cxlr); @@ -1133,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, } if (is_cxl_root(parent_port)) { - parent_ig = cxlrd->cxlsd.cxld.interleave_granularity; + /* + * Root decoder IG is always set to value in CFMWS which + * may be different than this region's IG. We can use the + * region's IG here since interleave_granularity_store() + * does not allow interleaved host-bridges with + * root IG != region IG. + */ + parent_ig = p->interleave_granularity; parent_iw = cxlrd->cxlsd.cxld.interleave_ways; /* * For purposes of address bit routing, use power-of-2 math for @@ -1195,6 +1196,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } + if (iw > 8 || iw > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s:%s: ways: %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), iw, cxlsd->nr_targets); + return -ENXIO; + } + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || cxld->interleave_granularity != ig || @@ -1480,6 +1489,14 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr, return 0; } +static int cmp_interleave_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + + return cxled_a->pos - cxled_b->pos; +} + static struct cxl_port *next_port(struct cxl_port *port) { if (!port->parent_dport) @@ -1487,119 +1504,127 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int decoder_match_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, void *data) { - struct cxl_endpoint_decoder *cxled = data; struct cxl_switch_decoder *cxlsd; + struct range *r1, *r2 = data; if (!is_switch_decoder(dev)) return 0; cxlsd = to_cxl_switch_decoder(dev); - return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); -} - -static void find_positions(const struct cxl_switch_decoder *cxlsd, - const struct cxl_port *iter_a, - const struct cxl_port *iter_b, int *a_pos, - int *b_pos) -{ - int i; + r1 = &cxlsd->cxld.hpa_range; - for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { - if (cxlsd->target[i] == iter_a->parent_dport) - *a_pos = i; - else if (cxlsd->target[i] == iter_b->parent_dport) - *b_pos = i; - if (*a_pos >= 0 && *b_pos >= 0) - break; - } + if (is_root_decoder(dev)) + return range_contains(r1, r2); + return (r1->start == r2->start && r1->end == r2->end); } -static int cmp_decode_pos(const void *a, const void *b) +static int find_pos_and_ways(struct cxl_port *port, struct range *range, + int *pos, int *ways) { - struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; - struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; - struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); - struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); - struct cxl_port *port_a = cxled_to_port(cxled_a); - struct cxl_port *port_b = cxled_to_port(cxled_b); - struct cxl_port *iter_a, *iter_b, *port = NULL; struct cxl_switch_decoder *cxlsd; + struct cxl_port *parent; struct device *dev; - int a_pos, b_pos; - unsigned int seq; - - /* Exit early if any prior sorting failed */ - if (cxled_a->pos < 0 || cxled_b->pos < 0) - return 0; + int rc = -ENXIO; - /* - * Walk up the hierarchy to find a shared port, find the decoder that - * maps the range, compare the relative position of those dport - * mappings. - */ - for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { - struct cxl_port *next_a, *next_b; + parent = next_port(port); + if (!parent) + return rc; - next_a = next_port(iter_a); - if (!next_a) - break; + dev = device_find_child(&parent->dev, range, + match_switch_decoder_by_range); + if (!dev) { + dev_err(port->uport_dev, + "failed to find decoder mapping %#llx-%#llx\n", + range->start, range->end); + return rc; + } + cxlsd = to_cxl_switch_decoder(dev); + *ways = cxlsd->cxld.interleave_ways; - for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { - next_b = next_port(iter_b); - if (next_a != next_b) - continue; - port = next_a; + for (int i = 0; i < *ways; i++) { + if (cxlsd->target[i] == port->parent_dport) { + *pos = i; + rc = 0; break; } - - if (port) - break; } + put_device(dev); - if (!port) { - dev_err(cxlmd_a->dev.parent, - "failed to find shared port with %s\n", - dev_name(cxlmd_b->dev.parent)); - goto err; - } + return rc; +} - dev = device_find_child(&port->dev, cxled_a, decoder_match_range); - if (!dev) { - struct range *range = &cxled_a->cxld.hpa_range; +/** + * cxl_calc_interleave_pos() - calculate an endpoint position in a region + * @cxled: endpoint decoder member of given region + * + * The endpoint position is calculated by traversing the topology from + * the endpoint to the root decoder and iteratively applying this + * calculation: + * + * position = position * parent_ways + parent_pos; + * + * ...where @position is inferred from switch and root decoder target lists. + * + * Return: position >= 0 on success + * -ENXIO on failure + */ +static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *iter, *port = cxled_to_port(cxled); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *range = &cxled->cxld.hpa_range; + int parent_ways = 0, parent_pos = 0, pos = 0; + int rc; - dev_err(port->uport_dev, - "failed to find decoder that maps %#llx-%#llx\n", - range->start, range->end); - goto err; - } + /* + * Example: the expected interleave order of the 4-way region shown + * below is: mem0, mem2, mem1, mem3 + * + * root_port + * / \ + * host_bridge_0 host_bridge_1 + * | | | | + * mem0 mem1 mem2 mem3 + * + * In the example the calculator will iterate twice. The first iteration + * uses the mem position in the host-bridge and the ways of the host- + * bridge to generate the first, or local, position. The second + * iteration uses the host-bridge position in the root_port and the ways + * of the root_port to refine the position. + * + * A trace of the calculation per endpoint looks like this: + * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0 + * pos = 0 * 2 + 0 pos = 0 * 2 + 1 + * pos: 0 pos: 1 + * + * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1 + * pos = 1 * 2 + 0 pos = 1 * 2 + 1 + * pos: 2 pos = 3 + * + * Note that while this example is simple, the method applies to more + * complex topologies, including those with switches. + */ - cxlsd = to_cxl_switch_decoder(dev); - do { - seq = read_seqbegin(&cxlsd->target_lock); - find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); - } while (read_seqretry(&cxlsd->target_lock, seq)); + /* Iterate from endpoint to root_port refining the position */ + for (iter = port; iter; iter = next_port(iter)) { + if (is_cxl_root(iter)) + break; - put_device(dev); + rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways); + if (rc) + return rc; - if (a_pos < 0 || b_pos < 0) { - dev_err(port->uport_dev, - "failed to find shared decoder for %s and %s\n", - dev_name(cxlmd_a->dev.parent), - dev_name(cxlmd_b->dev.parent)); - goto err; + pos = pos * parent_ways + parent_pos; } - dev_dbg(port->uport_dev, "%s comes %s %s\n", - dev_name(cxlmd_a->dev.parent), - a_pos - b_pos < 0 ? "before" : "after", - dev_name(cxlmd_b->dev.parent)); + dev_dbg(&cxlmd->dev, + "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n", + dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent), + dev_name(&port->dev), range->start, range->end, pos); - return a_pos - b_pos; -err: - cxled_a->pos = -1; - return 0; + return pos; } static int cxl_region_sort_targets(struct cxl_region *cxlr) @@ -1607,22 +1632,21 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int i, rc = 0; - sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, - NULL); - for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; + cxled->pos = cxl_calc_interleave_pos(cxled); /* - * Record that sorting failed, but still continue to restore - * cxled->pos with its ->targets[] position so that follow-on - * code paths can reliably do p->targets[cxled->pos] to - * self-reference their entry. + * Record that sorting failed, but still continue to calc + * cxled->pos so that follow-on code paths can reliably + * do p->targets[cxled->pos] to self-reference their entry. */ if (cxled->pos < 0) rc = -ENXIO; - cxled->pos = i; } + /* Keep the cxlr target list in interleave position order */ + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), + cmp_interleave_pos, NULL); dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); return rc; @@ -1658,6 +1682,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } + if (p->nr_targets >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "region already has %d endpoints\n", + p->nr_targets); + return -EINVAL; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1750,7 +1780,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (p->nr_targets == p->interleave_ways) { rc = cxl_region_setup_targets(cxlr); if (rc) - goto err_decrement; + return rc; p->state = CXL_CONFIG_ACTIVE; } @@ -1761,13 +1791,27 @@ static int cxl_region_attach(struct cxl_region *cxlr, .end = p->res->end, }; - return 0; + if (p->nr_targets != p->interleave_ways) + return 0; -err_decrement: - p->nr_targets--; - cxled->pos = -1; - p->targets[pos] = NULL; - return rc; + /* + * Test the auto-discovery position calculator function + * against this successfully created user-defined region. + * A fail message here means that this interleave config + * will fail when presented as CXL_REGION_F_AUTO. + */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + int test_pos; + + test_pos = cxl_calc_interleave_pos(cxled); + dev_dbg(&cxled->cxld.dev, + "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n", + (test_pos == cxled->pos) ? "success" : "fail", + test_pos, cxled->pos); + } + + return 0; } static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) @@ -2696,7 +2740,7 @@ err: return rc; } -static int match_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; @@ -2827,7 +2871,7 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) int rc; cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, - match_decoder_by_range); + match_root_decoder_by_range); if (!cxlrd_dev) { dev_err(cxlmd->dev.parent, "%s:%s no CXL window for range %#llx:%#llx\n", diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 6281127b3e9d..372786f80955 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -204,7 +204,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask) { - struct device *dev = map->dev; + struct device *host = map->host; struct mapinfo { const struct cxl_reg_map *rmap; void __iomem **addr; @@ -216,16 +216,16 @@ int cxl_map_component_regs(const struct cxl_register_map *map, for (i = 0; i < ARRAY_SIZE(mapinfo); i++) { struct mapinfo *mi = &mapinfo[i]; - resource_size_t phys_addr; + resource_size_t addr; resource_size_t length; if (!mi->rmap->valid) continue; if (!test_bit(mi->rmap->id, &map_mask)) continue; - phys_addr = map->resource + mi->rmap->offset; + addr = map->resource + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -237,7 +237,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs) { - struct device *dev = map->dev; + struct device *host = map->host; resource_size_t phys_addr = map->resource; struct mapinfo { const struct cxl_reg_map *rmap; @@ -259,7 +259,7 @@ int cxl_map_device_regs(const struct cxl_register_map *map, addr = phys_addr + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -309,7 +309,7 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, int regloc, i; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -386,10 +386,9 @@ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) } EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map) +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs) { - struct device *dev = &pdev->dev; + struct device *dev = map->host; resource_size_t phys_addr; phys_addr = map->resource; @@ -403,15 +402,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL); static int cxl_map_regblock(struct cxl_register_map *map) { - struct device *dev = map->dev; + struct device *host = map->host; map->base = ioremap(map->resource, map->max_size); if (!map->base) { - dev_err(dev, "failed to map registers\n"); + dev_err(host, "failed to map registers\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); + dev_dbg(host, "Mapped CXL Memory Device resource %pa\n", &map->resource); return 0; } @@ -425,28 +424,28 @@ static int cxl_probe_regs(struct cxl_register_map *map) { struct cxl_component_reg_map *comp_map; struct cxl_device_reg_map *dev_map; - struct device *dev = map->dev; + struct device *host = map->host; void __iomem *base = map->base; switch (map->reg_type) { case CXL_REGLOC_RBI_COMPONENT: comp_map = &map->component_map; - cxl_probe_component_regs(dev, base, comp_map); - dev_dbg(dev, "Set up component registers\n"); + cxl_probe_component_regs(host, base, comp_map); + dev_dbg(host, "Set up component registers\n"); break; case CXL_REGLOC_RBI_MEMDEV: dev_map = &map->device_map; - cxl_probe_device_regs(dev, base, dev_map); + cxl_probe_device_regs(host, base, dev_map); if (!dev_map->status.valid || !dev_map->mbox.valid || !dev_map->memdev.valid) { - dev_err(dev, "registers not found: %s%s%s\n", + dev_err(host, "registers not found: %s%s%s\n", !dev_map->status.valid ? "status " : "", !dev_map->mbox.valid ? "mbox " : "", !dev_map->memdev.valid ? "memdev " : ""); return -ENXIO; } - dev_dbg(dev, "Probing device registers...\n"); + dev_dbg(host, "Probing device registers...\n"); break; default: break; @@ -470,6 +469,42 @@ int cxl_setup_regs(struct cxl_register_map *map) } EXPORT_SYMBOL_NS_GPL(cxl_setup_regs, CXL); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u16 offset = 0; + u32 cap_hdr; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + cap_hdr = readl(addr + offset); + while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) { + offset = PCI_EXT_CAP_NEXT(cap_hdr); + + /* Offset 0 terminates capability list. */ + if (!offset) + break; + cap_hdr = readl(addr + offset); + } + + if (offset) + dev_dbg(dev, "found AER extended capability (0x%x)\n", offset); + + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return offset; +} + resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 76d92561af29..687043ece101 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -221,6 +221,14 @@ struct cxl_regs { struct_group_tagged(cxl_pmu_regs, pmu_regs, void __iomem *pmu; ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); }; struct cxl_reg_map { @@ -247,7 +255,7 @@ struct cxl_pmu_reg_map { /** * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @dev: device for devm operations and logging + * @host: device for devm operations and logging * @base: virtual base of the register-block-BAR + @block_offset * @resource: physical resource base of the register block * @max_size: maximum mapping size to perform register search @@ -257,7 +265,7 @@ struct cxl_pmu_reg_map { * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units */ struct cxl_register_map { - struct device *dev; + struct device *host; void __iomem *base; resource_size_t resource; resource_size_t max_size; @@ -278,8 +286,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map); +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); @@ -321,6 +328,7 @@ enum cxl_decoder_type { */ #define CXL_DECODER_MAX_INTERLEAVE 16 +#define CXL_QOS_CLASS_INVALID -1 /** * struct cxl_decoder - Common CXL HDM Decoder Attributes @@ -432,6 +440,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, * @calc_hb: which host bridge covers the n'th position by granularity * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range + * @qos_class: QoS performance class cookie * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { @@ -440,6 +449,7 @@ struct cxl_root_decoder { cxl_calc_hb_fn calc_hb; void *platform_data; struct mutex range_lock; + int qos_class; struct cxl_switch_decoder cxlsd; }; @@ -572,11 +582,10 @@ struct cxl_dax_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering - * @component_reg_phys: component register capability base address (optional) * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data @@ -592,11 +601,10 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int nr_dports; int hdm_end; int commit_end; - resource_size_t component_reg_phys; bool dead; unsigned int depth; struct cxl_cdat { @@ -620,19 +628,21 @@ struct cxl_rcrb_info { /** * struct cxl_dport - CXL downstream port * @dport_dev: PCI bridge or firmware device representing the downstream link - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @port_id: unique hardware identifier for dport in decoder target list * @rcrb: Data about the Root Complex Register Block layout * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port + * @regs: Dport parsed register blocks */ struct cxl_dport { struct device *dport_dev; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int port_id; struct cxl_rcrb_info rcrb; bool rch; struct cxl_port *port; + struct cxl_regs regs; }; /** @@ -679,6 +689,7 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport_dev == port->dev.parent; } +int cxl_num_decoders_committed(struct cxl_port *port); bool is_cxl_port(const struct device *dev); struct cxl_port *to_cxl_port(const struct device *dev); struct pci_bus; @@ -706,6 +717,13 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); +#ifdef CONFIG_PCIEAER_CXL +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); +#else +static inline void cxl_setup_parent_dport(struct device *host, + struct cxl_dport *dport) { } +#endif + struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 706f8a6d1ef4..a2fcbca253f3 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -84,9 +84,12 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds); +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd); struct cxl_memdev_state; -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds); +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds); int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); @@ -360,16 +363,16 @@ struct cxl_fw_state { * * @state: state of last security operation * @enabled_cmds: All security commands enabled in the CEL - * @poll: polling for sanitization is enabled, device has no mbox irq support * @poll_tmo_secs: polling timeout + * @sanitize_active: sanitize completion pending * @poll_dwork: polling work item * @sanitize_node: sanitation sysfs file to notify */ struct cxl_security_state { unsigned long state; DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX); - bool poll; int poll_tmo_secs; + bool sanitize_active; struct delayed_work poll_dwork; struct kernfs_node *sanitize_node; }; @@ -397,6 +400,7 @@ enum cxl_devtype { * * @dev: The device associated with this CXL state * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters * @regs: Parsed register blocks * @cxl_dvsec: Offset to the PCIe device DVSEC * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) @@ -404,13 +408,13 @@ enum cxl_devtype { * @dpa_res: Overall DPA resource tree for the device * @pmem_res: Active Persistent memory capacity configuration * @ram_res: Active Volatile memory capacity configuration - * @component_reg_phys: register base of component registers * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device */ struct cxl_dev_state { struct device *dev; struct cxl_memdev *cxlmd; + struct cxl_register_map reg_map; struct cxl_regs regs; int cxl_dvsec; bool rcd; @@ -418,7 +422,6 @@ struct cxl_dev_state { struct resource dpa_res; struct resource pmem_res; struct resource ram_res; - resource_size_t component_reg_phys; u64 serial; enum cxl_devtype type; }; @@ -883,7 +886,7 @@ static inline void cxl_mem_active_dec(void) } #endif -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 317c7548e4e9..e087febf9af0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -49,7 +49,6 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, struct cxl_dport *parent_dport) { struct cxl_port *parent_port = parent_dport->port; - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *endpoint, *iter, *down; int rc; @@ -65,8 +64,8 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, ep->next = down; } - endpoint = devm_cxl_add_port(host, &cxlmd->dev, - cxlds->component_reg_phys, + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, parent_dport); if (IS_ERR(endpoint)) return PTR_ERR(endpoint); @@ -158,6 +157,8 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; + cxl_setup_parent_dport(dev, dport); + device_lock(endpoint_parent); if (!endpoint_parent->driver) { dev_err(dev, "CXL port topology %s not enabled\n", diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 44a21ab7add5..0155fb66b580 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -85,25 +85,28 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ status & CXLMDEV_FW_HALT ? " firmware-halt" : "") +/* + * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique + * wrapper object for each irq within the same cxlds. + */ struct cxl_dev_id { struct cxl_dev_state *cxlds; }; static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, - irq_handler_t handler, irq_handler_t thread_fn) + irq_handler_t thread_fn) { struct device *dev = cxlds->dev; struct cxl_dev_id *dev_id; - /* dev_id must be globally unique and must contain the cxlds */ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); if (!dev_id) return -ENOMEM; dev_id->cxlds = cxlds; - return devm_request_threaded_irq(dev, irq, handler, thread_fn, - IRQF_SHARED | IRQF_ONESHOT, - NULL, dev_id); + return devm_request_threaded_irq(dev, irq, NULL, thread_fn, + IRQF_SHARED | IRQF_ONESHOT, NULL, + dev_id); } static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) @@ -128,10 +131,10 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); if (opcode == CXL_MBOX_OP_SANITIZE) { + mutex_lock(&mds->mbox_mutex); if (mds->security.sanitize_node) - sysfs_notify_dirent(mds->security.sanitize_node); - - dev_dbg(cxlds->dev, "Sanitization operation ended\n"); + mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); + mutex_unlock(&mds->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ rcuwait_wake_up(&mds->mbox_wait); @@ -152,18 +155,16 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) mutex_lock(&mds->mbox_mutex); if (cxl_mbox_background_complete(cxlds)) { mds->security.poll_tmo_secs = 0; - put_device(cxlds->dev); - if (mds->security.sanitize_node) sysfs_notify_dirent(mds->security.sanitize_node); + mds->security.sanitize_active = false; dev_dbg(cxlds->dev, "Sanitization operation ended\n"); } else { int timeout = mds->security.poll_tmo_secs + 10; mds->security.poll_tmo_secs = min(15 * 60, timeout); - queue_delayed_work(system_wq, &mds->security.poll_dwork, - timeout * HZ); + schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); } mutex_unlock(&mds->mbox_mutex); } @@ -295,18 +296,15 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, * and allow userspace to poll(2) for completion. */ if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { - if (mds->security.poll) { - /* hold the device throughout */ - get_device(cxlds->dev); - - /* give first timeout a second */ - timeout = 1; - mds->security.poll_tmo_secs = timeout; - queue_delayed_work(system_wq, - &mds->security.poll_dwork, - timeout * HZ); - } - + if (mds->security.sanitize_active) + return -EBUSY; + + /* give first timeout a second */ + timeout = 1; + mds->security.poll_tmo_secs = timeout; + mds->security.sanitize_active = true; + schedule_delayed_work(&mds->security.poll_dwork, + timeout * HZ); dev_dbg(dev, "Sanitization operation started\n"); goto success; } @@ -389,7 +387,9 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); struct device *dev = cxlds->dev; unsigned long timeout; + int irq, msgnum; u64 md_status; + u32 ctrl; timeout = jiffies + mbox_ready_timeout * HZ; do { @@ -437,33 +437,26 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); rcuwait_init(&mds->mbox_wait); + INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); - if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) { - u32 ctrl; - int irq, msgnum; - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - - msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); - irq = pci_irq_vector(pdev, msgnum); - if (irq < 0) - goto mbox_poll; - - if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL)) - goto mbox_poll; + /* background command interrupts are optional */ + if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) + return 0; - /* enable background command mbox irq support */ - ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; - writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); + irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); + if (irq < 0) + return 0; + if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) return 0; - } -mbox_poll: - mds->security.poll = true; - INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); + dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); + /* enable background command mbox irq support */ + ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; + writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported"); return 0; } @@ -484,7 +477,7 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, resource_size_t component_reg_phys; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -653,7 +646,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) if (irq < 0) return irq; - return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread); + return cxl_request_irq(cxlds, irq, cxl_event_thread); } static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, @@ -834,16 +827,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) * If the component registers can't be found, the cxl_pci driver may * still be useful for management functions so don't return an error. */ - cxlds->component_reg_phys = CXL_RESOURCE_NONE; - rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, + &cxlds->reg_map); if (rc) dev_warn(&pdev->dev, "No component registers (%d)\n", rc); - else if (!map.component_map.ras.valid) + else if (!cxlds->reg_map.component_map.ras.valid) dev_dbg(&pdev->dev, "RAS registers not found\n"); - cxlds->component_reg_phys = map.resource; - - rc = cxl_map_component_regs(&map, &cxlds->regs.component, + rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component, BIT(CXL_CM_CAP_CAP_ID_RAS)); if (rc) dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); @@ -882,11 +873,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - cxlmd = devm_cxl_add_memdev(cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - rc = cxl_memdev_setup_fw_upload(mds); + rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); + if (rc) + return rc; + + rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); if (rc) return rc; @@ -900,7 +895,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) break; } - rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map); + rc = cxl_map_pmu_regs(&map, &pmu_regs); if (rc) { dev_dbg(&pdev->dev, "Could not map PMU regs\n"); break; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 6240e05b9542..47bc8e0b8590 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -62,6 +62,9 @@ static int cxl_switch_port_probe(struct cxl_port *port) struct cxl_hdm *cxlhdm; int rc; + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); + rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; |