summaryrefslogtreecommitdiff
path: root/drivers/spi
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2024-02-26 20:54:12 +0300
committerMark Brown <broonie@kernel.org>2024-02-26 20:54:12 +0300
commit786115655f4d9167bd855872bd55b0a37321806e (patch)
tree5c852bd5b4898e59c12d7d9333b75a736e153830 /drivers/spi
parente63aef9c9121e5061cbf5112d12cadc9da399692 (diff)
parent7dba2adb063bcf7a293eacb88980e0975b1fb1fd (diff)
downloadlinux-786115655f4d9167bd855872bd55b0a37321806e.tar.xz
spi: add support for pre-cooking messages
Merge series from David Lechner <dlechner@baylibre.com>: This is a follow-up to [1] where it was suggested to break down the proposed SPI offload support into smaller series. This takes on the first suggested task of introducing an API to "pre-cook" SPI messages. This idea was first discussed extensively in 2013 [2][3] and revisited more briefly 2022 [4]. The goal here is to be able to improve performance (higher throughput, and reduced CPU usage) by allowing peripheral drivers that use the same struct spi_message repeatedly to "pre-cook" the message once to avoid repeating the same validation, and possibly other operations each time the message is sent. This series includes __spi_validate() and the automatic splitting of xfers in the optimizations. Another frequently suggested optimization is doing DMA mapping only once. This is not included in this series, but can be added later (preferably by someone with a real use case for it). To show how this all works and get some real-world measurements, this series includes the core changes, optimization of a SPI controller driver, and optimization of an ADC driver. This test case was only able to take advantage of the single validation optimization, since it didn't require splitting transfers. With these changes, CPU usage of the threaded interrupt handler, which calls spi_sync(), was reduced from 83% to 73% while at the same time the sample rate (frequency of SPI xfers) was increased from 20kHz to 25kHz. [1]: https://lore.kernel.org/linux-spi/20240109-axi-spi-engine-series-3-v1-1-e42c6a986580@baylibre.com/T/ [2]: https://lore.kernel.org/linux-spi/E81F4810-48DD-41EE-B110-D0D848B8A510@martin.sperl.org/T/ [3]: https://lore.kernel.org/linux-spi/39DEC004-10A1-47EF-9D77-276188D2580C@martin.sperl.org/T/ [4]: https://lore.kernel.org/linux-spi/20220525163946.48ea40c9@erd992/T/
Diffstat (limited to 'drivers/spi')
-rw-r--r--drivers/spi/spi-axi-spi-engine.c40
-rw-r--r--drivers/spi/spi-stm32.c28
-rw-r--r--drivers/spi/spi.c259
3 files changed, 247 insertions, 80 deletions
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index ca66d202f0e2..6177c1a8d56e 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -109,6 +109,7 @@ struct spi_engine {
spinlock_t lock;
void __iomem *base;
+ struct spi_engine_message_state msg_state;
struct completion msg_complete;
unsigned int int_enable;
};
@@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
return IRQ_HANDLED;
}
-static int spi_engine_prepare_message(struct spi_controller *host,
- struct spi_message *msg)
+static int spi_engine_optimize_message(struct spi_message *msg)
{
struct spi_engine_program p_dry, *p;
- struct spi_engine_message_state *st;
size_t size;
- st = kzalloc(sizeof(*st), GFP_KERNEL);
- if (!st)
- return -ENOMEM;
-
spi_engine_precompile_message(msg);
p_dry.length = 0;
@@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller *host,
size = sizeof(*p->instructions) * (p_dry.length + 1);
p = kzalloc(sizeof(*p) + size, GFP_KERNEL);
- if (!p) {
- kfree(st);
+ if (!p)
return -ENOMEM;
- }
spi_engine_compile_message(msg, false, p);
spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
- st->p = p;
- st->cmd_buf = p->instructions;
- st->cmd_length = p->length;
- msg->state = st;
+ msg->opt_state = p;
return 0;
}
-static int spi_engine_unprepare_message(struct spi_controller *host,
- struct spi_message *msg)
+static int spi_engine_unoptimize_message(struct spi_message *msg)
{
- struct spi_engine_message_state *st = msg->state;
-
- kfree(st->p);
- kfree(st);
+ kfree(msg->opt_state);
return 0;
}
@@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct spi_controller *host,
struct spi_message *msg)
{
struct spi_engine *spi_engine = spi_controller_get_devdata(host);
- struct spi_engine_message_state *st = msg->state;
+ struct spi_engine_message_state *st = &spi_engine->msg_state;
+ struct spi_engine_program *p = msg->opt_state;
unsigned int int_enable = 0;
unsigned long flags;
+ /* reinitialize message state for this transfer */
+ memset(st, 0, sizeof(*st));
+ st->p = p;
+ st->cmd_buf = p->instructions;
+ st->cmd_length = p->length;
+ msg->state = st;
+
reinit_completion(&spi_engine->msg_complete);
spin_lock_irqsave(&spi_engine->lock, flags);
@@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev)
host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
host->transfer_one_message = spi_engine_transfer_one_message;
- host->prepare_message = spi_engine_prepare_message;
- host->unprepare_message = spi_engine_unprepare_message;
+ host->optimize_message = spi_engine_optimize_message;
+ host->unoptimize_message = spi_engine_unoptimize_message;
host->num_chipselect = 8;
if (host->max_speed_hz == 0)
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index c32e57bb38bd..e4e7ddb7524a 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1118,6 +1118,21 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static int stm32_spi_optimize_message(struct spi_message *msg)
+{
+ struct spi_controller *ctrl = msg->spi->controller;
+ struct stm32_spi *spi = spi_controller_get_devdata(ctrl);
+
+ /* On STM32H7, messages should not exceed a maximum size set
+ * later via the set_number_of_data function. In order to
+ * ensure that, split large messages into several messages
+ */
+ if (spi->cfg->set_number_of_data)
+ return spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
+
+ return 0;
+}
+
/**
* stm32_spi_prepare_msg - set up the controller to transfer a single message
* @ctrl: controller interface
@@ -1163,18 +1178,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
!!(spi_dev->mode & SPI_LSB_FIRST),
!!(spi_dev->mode & SPI_CS_HIGH));
- /* On STM32H7, messages should not exceed a maximum size setted
- * afterward via the set_number_of_data function. In order to
- * ensure that, split large messages into several messages
- */
- if (spi->cfg->set_number_of_data) {
- int ret;
-
- ret = spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
- if (ret)
- return ret;
- }
-
spin_lock_irqsave(&spi->lock, flags);
/* CPOL, CPHA and LSB FIRST bits have common register */
@@ -2180,6 +2183,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
ctrl->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min;
ctrl->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max;
ctrl->use_gpio_descriptors = true;
+ ctrl->optimize_message = stm32_spi_optimize_message;
ctrl->prepare_message = stm32_spi_prepare_msg;
ctrl->transfer_one = stm32_spi_transfer_one;
ctrl->unprepare_message = stm32_spi_unprepare_msg;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c2b10e2c75f0..ba4d3fde2054 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1747,38 +1747,6 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr,
trace_spi_message_start(msg);
- /*
- * If an SPI controller does not support toggling the CS line on each
- * transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
- * for the CS line, we can emulate the CS-per-word hardware function by
- * splitting transfers into one-word transfers and ensuring that
- * cs_change is set for each transfer.
- */
- if ((msg->spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
- spi_is_csgpiod(msg->spi))) {
- ret = spi_split_transfers_maxwords(ctlr, msg, 1);
- if (ret) {
- msg->status = ret;
- spi_finalize_current_message(ctlr);
- return ret;
- }
-
- list_for_each_entry(xfer, &msg->transfers, transfer_list) {
- /* Don't change cs_change on the last entry in the list */
- if (list_is_last(&xfer->transfer_list, &msg->transfers))
- break;
- xfer->cs_change = 1;
- }
- } else {
- ret = spi_split_transfers_maxsize(ctlr, msg,
- spi_max_transfer_size(msg->spi));
- if (ret) {
- msg->status = ret;
- spi_finalize_current_message(ctlr);
- return ret;
- }
- }
-
if (ctlr->prepare_message) {
ret = ctlr->prepare_message(ctlr, msg);
if (ret) {
@@ -2106,6 +2074,43 @@ struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr)
}
EXPORT_SYMBOL_GPL(spi_get_next_queued_message);
+/*
+ * __spi_unoptimize_message - shared implementation of spi_unoptimize_message()
+ * and spi_maybe_unoptimize_message()
+ * @msg: the message to unoptimize
+ *
+ * Peripheral drivers should use spi_unoptimize_message() and callers inside
+ * core should use spi_maybe_unoptimize_message() rather than calling this
+ * function directly.
+ *
+ * It is not valid to call this on a message that is not currently optimized.
+ */
+static void __spi_unoptimize_message(struct spi_message *msg)
+{
+ struct spi_controller *ctlr = msg->spi->controller;
+
+ if (ctlr->unoptimize_message)
+ ctlr->unoptimize_message(msg);
+
+ spi_res_release(ctlr, msg);
+
+ msg->optimized = false;
+ msg->opt_state = NULL;
+}
+
+/*
+ * spi_maybe_unoptimize_message - unoptimize msg not managed by a peripheral
+ * @msg: the message to unoptimize
+ *
+ * This function is used to unoptimize a message if and only if it was
+ * optimized by the core (via spi_maybe_optimize_message()).
+ */
+static void spi_maybe_unoptimize_message(struct spi_message *msg)
+{
+ if (!msg->pre_optimized && msg->optimized)
+ __spi_unoptimize_message(msg);
+}
+
/**
* spi_finalize_current_message() - the current message is complete
* @ctlr: the controller to return the message to
@@ -2134,15 +2139,6 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
spi_unmap_msg(ctlr, mesg);
- /*
- * In the prepare_messages callback the SPI bus has the opportunity
- * to split a transfer to smaller chunks.
- *
- * Release the split transfers here since spi_map_msg() is done on
- * the split transfers.
- */
- spi_res_release(ctlr, mesg);
-
if (mesg->prepared && ctlr->unprepare_message) {
ret = ctlr->unprepare_message(ctlr, mesg);
if (ret) {
@@ -2153,6 +2149,8 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
mesg->prepared = false;
+ spi_maybe_unoptimize_message(mesg);
+
WRITE_ONCE(ctlr->cur_msg_incomplete, false);
smp_mb(); /* See __spi_pump_transfer_message()... */
if (READ_ONCE(ctlr->cur_msg_need_completion))
@@ -3782,6 +3780,10 @@ static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
* @msg: the @spi_message to transform
* @maxsize: the maximum when to apply this
*
+ * This function allocates resources that are automatically freed during the
+ * spi message unoptimize phase so this function should only be called from
+ * optimize_message callbacks.
+ *
* Return: status of transformation
*/
int spi_split_transfers_maxsize(struct spi_controller *ctlr,
@@ -3820,6 +3822,10 @@ EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);
* @msg: the @spi_message to transform
* @maxwords: the number of words to limit each transfer to
*
+ * This function allocates resources that are automatically freed during the
+ * spi message unoptimize phase so this function should only be called from
+ * optimize_message callbacks.
+ *
* Return: status of transformation
*/
int spi_split_transfers_maxwords(struct spi_controller *ctlr,
@@ -4194,6 +4200,167 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
return 0;
}
+/*
+ * spi_split_transfers - generic handling of transfer splitting
+ * @msg: the message to split
+ *
+ * Under certain conditions, a SPI controller may not support arbitrary
+ * transfer sizes or other features required by a peripheral. This function
+ * will split the transfers in the message into smaller transfers that are
+ * supported by the controller.
+ *
+ * Controllers with special requirements not covered here can also split
+ * transfers in the optimize_message() callback.
+ *
+ * Context: can sleep
+ * Return: zero on success, else a negative error code
+ */
+static int spi_split_transfers(struct spi_message *msg)
+{
+ struct spi_controller *ctlr = msg->spi->controller;
+ struct spi_transfer *xfer;
+ int ret;
+
+ /*
+ * If an SPI controller does not support toggling the CS line on each
+ * transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
+ * for the CS line, we can emulate the CS-per-word hardware function by
+ * splitting transfers into one-word transfers and ensuring that
+ * cs_change is set for each transfer.
+ */
+ if ((msg->spi->mode & SPI_CS_WORD) &&
+ (!(ctlr->mode_bits & SPI_CS_WORD) || spi_is_csgpiod(msg->spi))) {
+ ret = spi_split_transfers_maxwords(ctlr, msg, 1);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+ /* Don't change cs_change on the last entry in the list */
+ if (list_is_last(&xfer->transfer_list, &msg->transfers))
+ break;
+
+ xfer->cs_change = 1;
+ }
+ } else {
+ ret = spi_split_transfers_maxsize(ctlr, msg,
+ spi_max_transfer_size(msg->spi));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * __spi_optimize_message - shared implementation for spi_optimize_message()
+ * and spi_maybe_optimize_message()
+ * @spi: the device that will be used for the message
+ * @msg: the message to optimize
+ *
+ * Peripheral drivers will call spi_optimize_message() and the spi core will
+ * call spi_maybe_optimize_message() instead of calling this directly.
+ *
+ * It is not valid to call this on a message that has already been optimized.
+ *
+ * Return: zero on success, else a negative error code
+ */
+static int __spi_optimize_message(struct spi_device *spi,
+ struct spi_message *msg)
+{
+ struct spi_controller *ctlr = spi->controller;
+ int ret;
+
+ ret = __spi_validate(spi, msg);
+ if (ret)
+ return ret;
+
+ ret = spi_split_transfers(msg);
+ if (ret)
+ return ret;
+
+ if (ctlr->optimize_message) {
+ ret = ctlr->optimize_message(msg);
+ if (ret) {
+ spi_res_release(ctlr, msg);
+ return ret;
+ }
+ }
+
+ msg->optimized = true;
+
+ return 0;
+}
+
+/*
+ * spi_maybe_optimize_message - optimize message if it isn't already pre-optimized
+ * @spi: the device that will be used for the message
+ * @msg: the message to optimize
+ * Return: zero on success, else a negative error code
+ */
+static int spi_maybe_optimize_message(struct spi_device *spi,
+ struct spi_message *msg)
+{
+ if (msg->pre_optimized)
+ return 0;
+
+ return __spi_optimize_message(spi, msg);
+}
+
+/**
+ * spi_optimize_message - do any one-time validation and setup for a SPI message
+ * @spi: the device that will be used for the message
+ * @msg: the message to optimize
+ *
+ * Peripheral drivers that reuse the same message repeatedly may call this to
+ * perform as much message prep as possible once, rather than repeating it each
+ * time a message transfer is performed to improve throughput and reduce CPU
+ * usage.
+ *
+ * Once a message has been optimized, it cannot be modified with the exception
+ * of updating the contents of any xfer->tx_buf (the pointer can't be changed,
+ * only the data in the memory it points to).
+ *
+ * Calls to this function must be balanced with calls to spi_unoptimize_message()
+ * to avoid leaking resources.
+ *
+ * Context: can sleep
+ * Return: zero on success, else a negative error code
+ */
+int spi_optimize_message(struct spi_device *spi, struct spi_message *msg)
+{
+ int ret;
+
+ ret = __spi_optimize_message(spi, msg);
+ if (ret)
+ return ret;
+
+ /*
+ * This flag indicates that the peripheral driver called spi_optimize_message()
+ * and therefore we shouldn't unoptimize message automatically when finalizing
+ * the message but rather wait until spi_unoptimize_message() is called
+ * by the peripheral driver.
+ */
+ msg->pre_optimized = true;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spi_optimize_message);
+
+/**
+ * spi_unoptimize_message - releases any resources allocated by spi_optimize_message()
+ * @msg: the message to unoptimize
+ *
+ * Calls to this function must be balanced with calls to spi_optimize_message().
+ *
+ * Context: can sleep
+ */
+void spi_unoptimize_message(struct spi_message *msg)
+{
+ __spi_unoptimize_message(msg);
+ msg->pre_optimized = false;
+}
+EXPORT_SYMBOL_GPL(spi_unoptimize_message);
+
static int __spi_async(struct spi_device *spi, struct spi_message *message)
{
struct spi_controller *ctlr = spi->controller;
@@ -4258,8 +4425,8 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
int ret;
unsigned long flags;
- ret = __spi_validate(spi, message);
- if (ret != 0)
+ ret = spi_maybe_optimize_message(spi, message);
+ if (ret)
return ret;
spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
@@ -4271,6 +4438,8 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
+ spi_maybe_unoptimize_message(message);
+
return ret;
}
EXPORT_SYMBOL_GPL(spi_async);
@@ -4331,8 +4500,8 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
return -ESHUTDOWN;
}
- status = __spi_validate(spi, message);
- if (status != 0)
+ status = spi_maybe_optimize_message(spi, message);
+ if (status)
return status;
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync);