From a912460efafea8ba763717b083347d5b33495bfa Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 6 May 2022 12:57:56 +0200 Subject: scsi: fcoe: Use per-CPU API to update per-CPU statistics The per-CPU statistics (struct fc_stats) is updated by getting a stable per-CPU pointer via get_cpu() + per_cpu_ptr() and then performing the increment. This can be optimized by using this_cpu_*() which will do whatever is needed on the architecture to perform the update safe and efficient. The read out of the individual value (fc_get_host_stats()) should be done by using READ_ONCE() instead of a plain-C access. The difference is that READ_ONCE() will always perform a single access while the plain-C access can be split by the compiler into two loads if it appears beneficial. The usage of u64 has the side-effect that it is also 64bit wide on 32bit architectures and the read is always split into two loads. The can lead to strange values if the read happens during an update which alters both 32bit parts of the 64bit value. This can be circumvented by either using a 32bit variables on 32bit architecures or extending the statistics with a sequence counter. Use this_cpu_*() API to update the statistics and READ_ONCE() to read it. Link: https://lore.kernel.org/r/20220506105758.283887-3-bigeasy@linutronix.de Reviewed-by: Davidlohr Bueso Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_fcp.c | 29 ++++++++++------------------- drivers/scsi/libfc/fc_lport.c | 30 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 34 deletions(-) (limited to 'drivers/scsi/libfc') diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index bce90eb56c9c..945adca5e72f 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -143,8 +143,7 @@ static struct fc_fcp_pkt *fc_fcp_pkt_alloc(struct fc_lport *lport, gfp_t gfp) INIT_LIST_HEAD(&fsp->list); spin_lock_init(&fsp->scsi_pkt_lock); } else { - per_cpu_ptr(lport->stats, get_cpu())->FcpPktAllocFails++; - put_cpu(); + this_cpu_inc(lport->stats->FcpPktAllocFails); } return fsp; } @@ -266,8 +265,7 @@ static int fc_fcp_send_abort(struct fc_fcp_pkt *fsp) if (!fsp->seq_ptr) return -EINVAL; - per_cpu_ptr(fsp->lp->stats, get_cpu())->FcpPktAborts++; - put_cpu(); + this_cpu_inc(fsp->lp->stats->FcpPktAborts); fsp->state |= FC_SRB_ABORT_PENDING; rc = fc_seq_exch_abort(fsp->seq_ptr, 0); @@ -436,8 +434,7 @@ static inline struct fc_frame *fc_fcp_frame_alloc(struct fc_lport *lport, if (likely(fp)) return fp; - per_cpu_ptr(lport->stats, get_cpu())->FcpFrameAllocFails++; - put_cpu(); + this_cpu_inc(lport->stats->FcpFrameAllocFails); /* error case */ fc_fcp_can_queue_ramp_down(lport); shost_printk(KERN_ERR, lport->host, @@ -471,7 +468,6 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) { struct scsi_cmnd *sc = fsp->cmd; struct fc_lport *lport = fsp->lp; - struct fc_stats *stats; struct fc_frame_header *fh; size_t start_offset; size_t offset; @@ -533,14 +529,12 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) if (~crc != le32_to_cpu(fr_crc(fp))) { crc_err: - stats = per_cpu_ptr(lport->stats, get_cpu()); - stats->ErrorFrames++; + this_cpu_inc(lport->stats->ErrorFrames); /* per cpu count, not total count, but OK for limit */ - if (stats->InvalidCRCCount++ < FC_MAX_ERROR_CNT) + if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < FC_MAX_ERROR_CNT) printk(KERN_WARNING "libfc: CRC error on data " "frame for port (%6.6x)\n", lport->port_id); - put_cpu(); /* * Assume the frame is total garbage. * We may have copied it over the good part @@ -1861,7 +1855,6 @@ int fc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc_cmd) struct fc_fcp_pkt *fsp; int rval; int rc = 0; - struct fc_stats *stats; rval = fc_remote_port_chkready(rport); if (rval) { @@ -1913,20 +1906,18 @@ int fc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc_cmd) /* * setup the data direction */ - stats = per_cpu_ptr(lport->stats, get_cpu()); if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { fsp->req_flags = FC_SRB_READ; - stats->InputRequests++; - stats->InputBytes += fsp->data_len; + this_cpu_inc(lport->stats->InputRequests); + this_cpu_add(lport->stats->InputBytes, fsp->data_len); } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { fsp->req_flags = FC_SRB_WRITE; - stats->OutputRequests++; - stats->OutputBytes += fsp->data_len; + this_cpu_inc(lport->stats->OutputRequests); + this_cpu_add(lport->stats->OutputBytes, fsp->data_len); } else { fsp->req_flags = 0; - stats->ControlRequests++; + this_cpu_inc(lport->stats->ControlRequests); } - put_cpu(); /* * send it to the lower layer diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 19cd4a95d354..9c02c9523c4d 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -308,21 +308,21 @@ struct fc_host_statistics *fc_get_host_stats(struct Scsi_Host *shost) stats = per_cpu_ptr(lport->stats, cpu); - fc_stats->tx_frames += stats->TxFrames; - fc_stats->tx_words += stats->TxWords; - fc_stats->rx_frames += stats->RxFrames; - fc_stats->rx_words += stats->RxWords; - fc_stats->error_frames += stats->ErrorFrames; - fc_stats->invalid_crc_count += stats->InvalidCRCCount; - fc_stats->fcp_input_requests += stats->InputRequests; - fc_stats->fcp_output_requests += stats->OutputRequests; - fc_stats->fcp_control_requests += stats->ControlRequests; - fcp_in_bytes += stats->InputBytes; - fcp_out_bytes += stats->OutputBytes; - fc_stats->fcp_packet_alloc_failures += stats->FcpPktAllocFails; - fc_stats->fcp_packet_aborts += stats->FcpPktAborts; - fc_stats->fcp_frame_alloc_failures += stats->FcpFrameAllocFails; - fc_stats->link_failure_count += stats->LinkFailureCount; + fc_stats->tx_frames += READ_ONCE(stats->TxFrames); + fc_stats->tx_words += READ_ONCE(stats->TxWords); + fc_stats->rx_frames += READ_ONCE(stats->RxFrames); + fc_stats->rx_words += READ_ONCE(stats->RxWords); + fc_stats->error_frames += READ_ONCE(stats->ErrorFrames); + fc_stats->invalid_crc_count += READ_ONCE(stats->InvalidCRCCount); + fc_stats->fcp_input_requests += READ_ONCE(stats->InputRequests); + fc_stats->fcp_output_requests += READ_ONCE(stats->OutputRequests); + fc_stats->fcp_control_requests += READ_ONCE(stats->ControlRequests); + fcp_in_bytes += READ_ONCE(stats->InputBytes); + fcp_out_bytes += READ_ONCE(stats->OutputBytes); + fc_stats->fcp_packet_alloc_failures += READ_ONCE(stats->FcpPktAllocFails); + fc_stats->fcp_packet_aborts += READ_ONCE(stats->FcpPktAborts); + fc_stats->fcp_frame_alloc_failures += READ_ONCE(stats->FcpFrameAllocFails); + fc_stats->link_failure_count += READ_ONCE(stats->LinkFailureCount); } fc_stats->fcp_input_megabytes = div_u64(fcp_in_bytes, 1000000); fc_stats->fcp_output_megabytes = div_u64(fcp_out_bytes, 1000000); -- cgit v1.2.3 From a0548edf852a8776dade1e511694b2980c674e2a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 6 May 2022 12:57:57 +0200 Subject: scsi: libfc: Remove get_cpu() semantics in fc_exch_em_alloc() The get_cpu() in fc_exch_em_alloc() was introduced in commit f018b73af6db ("[SCSI] libfc, libfcoe, fcoe: use smp_processor_id() only when preempt disabled") for no other reason than to simply use smp_processor_id() without getting a warning, because everything is done with the pool->lock held anyway. However, get_cpu(), by disabling preemption, does not play well with PREEMPT_RT, particularly when acquiring a regular (and thus sleepable) spinlock. Therefore remove the get_cpu() and just use the unstable value as we will have CPU locality guarantees next by taking the lock. The window of migration, as noted by Sebastian, is small and even if it happens the result is correct. Link: https://lore.kernel.org/r/20211117025956.79616-2-dave@stgolabs.net Link: https://lore.kernel.org/r/20220506105758.283887-4-bigeasy@linutronix.de Acked-by: Sebastian Andrzej Siewior Signed-off-by: Davidlohr Bueso Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_exch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/scsi/libfc') diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index aa223db4cf53..1d91c457527f 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -825,10 +825,9 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, } memset(ep, 0, sizeof(*ep)); - cpu = get_cpu(); + cpu = raw_smp_processor_id(); pool = per_cpu_ptr(mp->pool, cpu); spin_lock_bh(&pool->lock); - put_cpu(); /* peek cache of free slot */ if (pool->left != FC_XID_UNKNOWN) { -- cgit v1.2.3