summaryrefslogtreecommitdiff
path: root/drivers/scsi/ufs/ufshcd.c
diff options
context:
space:
mode:
authorDaejun Park <daejun7.park@samsung.com>2021-07-12 11:58:30 +0300
committerMartin K. Petersen <martin.petersen@oracle.com>2021-08-01 23:04:23 +0300
commitf02bc9754a6887bf5e286889265d24ce5e3b1952 (patch)
tree6de5252df9d746ae30d5932a16153c18f6b592b2 /drivers/scsi/ufs/ufshcd.c
parent33529018294f1eabc6b5bb2672941165e658e96a (diff)
downloadlinux-f02bc9754a6887bf5e286889265d24ce5e3b1952.tar.xz
scsi: ufs: ufshpb: Introduce Host Performance Buffer feature
Implement Host Performance Buffer (HPB) initialization and add function calls to UFS core driver. NAND flash-based storage devices, including UFS, have mechanisms to translate logical addresses of I/O requests to the corresponding physical addresses of the flash storage. In UFS, logical-to-physical-address (L2P) map data, which is required to identify the physical address for the requested I/Os, can only be partially stored in SRAM from NAND flash. Due to this partial loading, accessing the flash address area, where the L2P information for that address is not loaded in the SRAM, can result in serious performance degradation. The basic concept of HPB is to cache L2P mapping entries in host system memory so that both physical block address (PBA) and logical block address (LBA) can be delivered in HPB read command. The HPB read command allows to read data faster than a regular read command in UFS since it provides the physical address (HPB Entry) of the desired logical block in addition to its logical address. The UFS device can access the physical block in NAND directly without searching and uploading L2P mapping table. This improves read performance because the NAND read operation for uploading L2P mapping table is removed. In HPB initialization, the host checks if the UFS device supports HPB feature and retrieves related device capabilities. Then, HPB parameters are configured in the device. Total start-up time of popular applications was measured and the difference observed between HPB being enabled and disabled. Popular applications are 12 game apps and 24 non-game apps. Each test cycle consists of running 36 applications in sequence. We repeated the cycle for observing performance improvement by L2P mapping cache hit in HPB. The following is the test environment: - kernel version: 4.4.0 - RAM: 8GB - UFS 2.1 (64GB) Results: +-------+----------+----------+-------+ | cycle | baseline | with HPB | diff | +-------+----------+----------+-------+ | 1 | 272.4 | 264.9 | -7.5 | | 2 | 250.4 | 248.2 | -2.2 | | 3 | 226.2 | 215.6 | -10.6 | | 4 | 230.6 | 214.8 | -15.8 | | 5 | 232.0 | 218.1 | -13.9 | | 6 | 231.9 | 212.6 | -19.3 | +-------+----------+----------+-------+ We also measured HPB performance using iozone: $ iozone -r 4k -+n -i2 -ecI -t 16 -l 16 -u 16 -s $IO_RANGE/16 -F \ mnt/tmp_1 mnt/tmp_2 mnt/tmp_3 mnt/tmp_4 mnt/tmp_5 mnt/tmp_6 mnt/tmp_7 \ mnt/tmp_8 mnt/tmp_9 mnt/tmp_10 mnt/tmp_11 mnt/tmp_12 mnt/tmp_13 \ mnt/tmp_14 mnt/tmp_15 mnt/tmp_16 Results: +----------+--------+---------+ | IO range | HPB on | HPB off | +----------+--------+---------+ | 1 GB | 294.8 | 300.87 | | 4 GB | 293.51 | 179.35 | | 8 GB | 294.85 | 162.52 | | 16 GB | 293.45 | 156.26 | | 32 GB | 277.4 | 153.25 | +----------+--------+---------+ Link: https://lore.kernel.org/r/20210712085830epcms2p8c1288b7f7a81b044158a18232617b572@epcms2p8 Reported-by: kernel test robot <lkp@intel.com> Tested-by: Bean Huo <beanhuo@micron.com> Tested-by: Can Guo <cang@codeaurora.org> Tested-by: Stanley Chu <stanley.chu@mediatek.com> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Reviewed-by: Can Guo <cang@codeaurora.org> Reviewed-by: Bean Huo <beanhuo@micron.com> Reviewed-by: Stanley Chu <stanley.chu@mediatek.com> Acked-by: Avri Altman <Avri.Altman@wdc.com> Signed-off-by: Daejun Park <daejun7.park@samsung.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/ufs/ufshcd.c')
-rw-r--r--drivers/scsi/ufs/ufshcd.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 05495c34a2b7..0c54cf38913b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -24,6 +24,7 @@
#include "ufs-debugfs.h"
#include "ufs_bsg.h"
#include "ufshcd-crypto.h"
+#include "ufshpb.h"
#include <asm/unaligned.h>
#define CREATE_TRACE_POINTS
@@ -4986,6 +4987,25 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
return scsi_change_queue_depth(sdev, depth);
}
+static void ufshcd_hpb_destroy(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+ /* skip well-known LU */
+ if ((sdev->lun >= UFS_UPIU_MAX_UNIT_NUM_ID) || !ufshpb_is_allowed(hba))
+ return;
+
+ ufshpb_destroy_lu(hba, sdev);
+}
+
+static void ufshcd_hpb_configure(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+ /* skip well-known LU */
+ if ((sdev->lun >= UFS_UPIU_MAX_UNIT_NUM_ID) ||
+ !(hba->dev_info.hpb_enabled) || !ufshpb_is_allowed(hba))
+ return;
+
+ ufshpb_init_hpb_lu(hba, sdev);
+}
+
/**
* ufshcd_slave_configure - adjust SCSI device configurations
* @sdev: pointer to SCSI device
@@ -4995,6 +5015,8 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
struct ufs_hba *hba = shost_priv(sdev->host);
struct request_queue *q = sdev->request_queue;
+ ufshcd_hpb_configure(hba, sdev);
+
blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
if (hba->quirks & UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE)
blk_queue_update_dma_alignment(q, PAGE_SIZE - 1);
@@ -5021,6 +5043,9 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
struct ufs_hba *hba;
hba = shost_priv(sdev->host);
+
+ ufshcd_hpb_destroy(hba, sdev);
+
/* Drop the reference as it won't be needed anymore */
if (ufshcd_scsi_to_upiu_lun(sdev->lun) == UFS_UPIU_UFS_DEVICE_WLUN) {
unsigned long flags;
@@ -7100,6 +7125,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
* Stop the host controller and complete the requests
* cleared by h/w
*/
+ ufshpb_reset_host(hba);
ufshcd_hba_stop(hba);
hba->silence_err_logs = true;
ufshcd_complete_requests(hba);
@@ -7498,6 +7524,7 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
{
int err;
u8 model_index;
+ u8 b_ufs_feature_sup;
u8 *desc_buf;
struct ufs_dev_info *dev_info = &hba->dev_info;
@@ -7525,9 +7552,16 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
/* getting Specification Version in big endian format */
dev_info->wspecversion = desc_buf[DEVICE_DESC_PARAM_SPEC_VER] << 8 |
desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1];
+ b_ufs_feature_sup = desc_buf[DEVICE_DESC_PARAM_UFS_FEAT];
model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
+ if (dev_info->wspecversion >= UFS_DEV_HPB_SUPPORT_VERSION &&
+ (b_ufs_feature_sup & UFS_DEV_HPB_SUPPORT)) {
+ dev_info->hpb_enabled = true;
+ ufshpb_get_dev_info(hba, desc_buf);
+ }
+
err = ufshcd_read_string_desc(hba, model_index,
&dev_info->model, SD_ASCII_STD);
if (err < 0) {
@@ -7759,6 +7793,10 @@ static int ufshcd_device_geo_params_init(struct ufs_hba *hba)
else if (desc_buf[GEOMETRY_DESC_PARAM_MAX_NUM_LUN] == 0)
hba->dev_info.max_lu_supported = 8;
+ if (hba->desc_size[QUERY_DESC_IDN_GEOMETRY] >=
+ GEOMETRY_DESC_PARAM_HPB_MAX_ACTIVE_REGS)
+ ufshpb_get_geo_info(hba, desc_buf);
+
out:
kfree(desc_buf);
return err;
@@ -7901,6 +7939,7 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
}
ufs_bsg_probe(hba);
+ ufshpb_init(hba);
scsi_scan_host(hba->host);
pm_runtime_put_sync(hba->dev);
@@ -8049,6 +8088,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async)
/* Enable Auto-Hibernate if configured */
ufshcd_auto_hibern8_enable(hba);
+ ufshpb_reset(hba);
out:
spin_lock_irqsave(hba->host->host_lock, flags);
if (ret)
@@ -8096,6 +8136,9 @@ out:
static const struct attribute_group *ufshcd_driver_groups[] = {
&ufs_sysfs_unit_descriptor_group,
&ufs_sysfs_lun_attributes_group,
+#ifdef CONFIG_SCSI_UFS_HPB
+ &ufs_sysfs_hpb_stat_group,
+#endif
NULL,
};
@@ -8797,6 +8840,8 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
req_link_state = UIC_LINK_OFF_STATE;
}
+ ufshpb_suspend(hba);
+
/*
* If we can't transition into any of the low power modes
* just gate the clocks.
@@ -8920,6 +8965,7 @@ out:
ufshcd_update_evt_hist(hba, UFS_EVT_WL_SUSP_ERR, (u32)ret);
hba->clk_gating.is_suspended = false;
ufshcd_release(hba);
+ ufshpb_resume(hba);
}
hba->pm_op_in_progress = false;
return ret;
@@ -8998,6 +9044,8 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
/* Enable Auto-Hibernate if configured */
ufshcd_auto_hibern8_enable(hba);
+
+ ufshpb_resume(hba);
goto out;
set_old_link_state: