From 01c89ab7f81b76de00daccf6a856a00eb63a17d7 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Oct 2023 14:50:11 +0200 Subject: s390/ap: rework to use irq info from ap queue status This patch reworks the irq handling and reporting code for the AP queue interrupt handling to always use the irq info from the queue status. Until now the interrupt status of an AP queue was stored into a bool variable within the ap_queue struct. This variable was set on a successful interrupt enablement and cleared with kicking a reset. However, it may be that the interrupt state is manipulated outband for example by a hypervisor. This patch removes this variable and instead the irq bit from the AP queue status which is always reflecting the current irq state is used. Reviewed-by: Tony Krowiak Reviewed-by: Holger Dengler Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 1 - drivers/s390/crypto/ap_queue.c | 22 ++++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 359a35f894d5..b0771ca0849b 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -206,7 +206,6 @@ struct ap_queue { bool config; /* configured state */ bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ - bool interrupt; /* indicate if interrupts are enabled */ bool se_bound; /* SE bound state */ unsigned int assoc_idx; /* SE association index */ int queue_count; /* # messages currently on AP queue. */ diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 993240370ecf..f1abd21661dc 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -203,7 +203,7 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) return AP_SM_WAIT_NONE; case AP_RESPONSE_NO_PENDING_REPLY: if (aq->queue_count > 0) - return aq->interrupt ? + return status.irq_enabled ? AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT; aq->sm_state = AP_SM_STATE_IDLE; return AP_SM_WAIT_NONE; @@ -254,7 +254,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) fallthrough; case AP_RESPONSE_Q_FULL: aq->sm_state = AP_SM_STATE_QUEUE_FULL; - return aq->interrupt ? + return status.irq_enabled ? AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT; case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; @@ -307,7 +307,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; - aq->interrupt = false; aq->rapq_fbit = 0; aq->se_bound = false; return AP_SM_WAIT_LOW_TIMEOUT; @@ -383,7 +382,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) if (status.irq_enabled == 1) { /* Irqs are now enabled */ - aq->interrupt = true; aq->sm_state = (aq->queue_count > 0) ? AP_SM_STATE_WORKING : AP_SM_STATE_IDLE; } @@ -626,16 +624,21 @@ static ssize_t interrupt_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ap_queue *aq = to_ap_queue(dev); + struct ap_queue_status status; int rc = 0; spin_lock_bh(&aq->lock); - if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) + if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) { rc = sysfs_emit(buf, "Enable Interrupt pending.\n"); - else if (aq->interrupt) - rc = sysfs_emit(buf, "Interrupts enabled.\n"); - else - rc = sysfs_emit(buf, "Interrupts disabled.\n"); + } else { + status = ap_tapq(aq->qid, NULL); + if (status.irq_enabled) + rc = sysfs_emit(buf, "Interrupts enabled.\n"); + else + rc = sysfs_emit(buf, "Interrupts disabled.\n"); + } spin_unlock_bh(&aq->lock); + return rc; } @@ -1032,7 +1035,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) if (ap_sb_available() && is_prot_virt_guest()) aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups; aq->qid = qid; - aq->interrupt = false; spin_lock_init(&aq->lock); INIT_LIST_HEAD(&aq->pendingq); INIT_LIST_HEAD(&aq->requestq); -- cgit v1.2.3 From c40284b3642554d6cf519525872f2f5784933d8d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Oct 2023 15:42:21 +0200 Subject: s390/ap: re-enable interrupt for AP queues This patch introduces some code lines which check for interrupt support enabled on an AP queue after a reply has been received. This invocation has been chosen as there is a good chance to have the queue empty at that time. As the enablement of the irq imples a state machine change the queue should not have any pending requests or unreceived replies. Reviewed-by: Tony Krowiak Reviewed-by: Holger Dengler Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_queue.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index f1abd21661dc..3934a0cc13e7 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) return AP_SM_WAIT_AGAIN; } aq->sm_state = AP_SM_STATE_IDLE; - return AP_SM_WAIT_NONE; + break; case AP_RESPONSE_NO_PENDING_REPLY: if (aq->queue_count > 0) return status.irq_enabled ? AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT; aq->sm_state = AP_SM_STATE_IDLE; - return AP_SM_WAIT_NONE; + break; default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; @@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } + /* Check and maybe enable irq support (again) on this queue */ + if (!status.irq_enabled && status.queue_empty) { + void *lsi_ptr = ap_airq_ptr(); + + if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) { + aq->sm_state = AP_SM_STATE_SETIRQ_WAIT; + return AP_SM_WAIT_AGAIN; + } + } + return AP_SM_WAIT_NONE; } /** -- cgit v1.2.3 From e14aec23025eeb1f2159ba34dbc1458467c4c347 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Oct 2023 09:57:10 +0200 Subject: s390/ap: fix AP bus crash on early config change callback invocation Fix kernel crash in AP bus code caused by very early invocation of the config change callback function via SCLP. After a fresh IML of the machine the crypto cards are still offline and will get switched online only with activation of any LPAR which has the card in it's configuration. A crypto card coming online is reported to the LPAR via SCLP and the AP bus offers a callback function to get this kind of information. However, it may happen that the callback is invoked before the AP bus init function is complete. As the callback triggers a synchronous AP bus scan, the scan may already run but some internal states are not initialized by the AP bus init function resulting in a crash like this: [ 11.635859] Unable to handle kernel pointer dereference in virtual kernel address space [ 11.635861] Failing address: 0000000000000000 TEID: 0000000000000887 [ 11.635862] Fault in home space mode while using kernel ASCE. [ 11.635864] AS:00000000894c4007 R3:00000001fece8007 S:00000001fece7800 P:000000000000013d [ 11.635879] Oops: 0004 ilc:1 [#1] SMP [ 11.635882] Modules linked in: [ 11.635884] CPU: 5 PID: 42 Comm: kworker/5:0 Not tainted 6.6.0-rc3-00003-g4dbf7cdc6b42 #12 [ 11.635886] Hardware name: IBM 3931 A01 751 (LPAR) [ 11.635887] Workqueue: events_long ap_scan_bus [ 11.635891] Krnl PSW : 0704c00180000000 0000000000000000 (0x0) [ 11.635895] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 [ 11.635897] Krnl GPRS: 0000000001000a00 0000000000000000 0000000000000006 0000000089591940 [ 11.635899] 0000000080000000 0000000000000a00 0000000000000000 0000000000000000 [ 11.635901] 0000000081870c00 0000000089591000 000000008834e4e2 0000000002625a00 [ 11.635903] 0000000081734200 0000038000913c18 000000008834c6d6 0000038000913ac8 [ 11.635906] Krnl Code:>0000000000000000: 0000 illegal [ 11.635906] 0000000000000002: 0000 illegal [ 11.635906] 0000000000000004: 0000 illegal [ 11.635906] 0000000000000006: 0000 illegal [ 11.635906] 0000000000000008: 0000 illegal [ 11.635906] 000000000000000a: 0000 illegal [ 11.635906] 000000000000000c: 0000 illegal [ 11.635906] 000000000000000e: 0000 illegal [ 11.635915] Call Trace: [ 11.635916] [<0000000000000000>] 0x0 [ 11.635918] [<000000008834e4e2>] ap_queue_init_state+0x82/0xb8 [ 11.635921] [<000000008834ba1c>] ap_scan_domains+0x6fc/0x740 [ 11.635923] [<000000008834c092>] ap_scan_adapter+0x632/0x8b0 [ 11.635925] [<000000008834c3e4>] ap_scan_bus+0xd4/0x288 [ 11.635927] [<00000000879a33ba>] process_one_work+0x19a/0x410 [ 11.635930] Discipline DIAG cannot be used without z/VM [ 11.635930] [<00000000879a3a2c>] worker_thread+0x3fc/0x560 [ 11.635933] [<00000000879aea60>] kthread+0x120/0x128 [ 11.635936] [<000000008792afa4>] __ret_from_fork+0x3c/0x58 [ 11.635938] [<00000000885ebe62>] ret_from_fork+0xa/0x30 [ 11.635942] Last Breaking-Event-Address: [ 11.635942] [<000000008834c6d4>] ap_wait+0xcc/0x148 This patch improves the ap_bus_force_rescan() function which is invoked by the config change callback by checking if a first initial AP bus scan has been done. If not, the force rescan request is simple ignored. Anyhow it does not make sense to trigger AP bus re-scans even before the very first bus scan is complete. Cc: stable@vger.kernel.org Reviewed-by: Holger Dengler Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d09e08b71cfb..d6ad437883fa 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1022,6 +1022,10 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { + /* Only trigger AP bus scans after the initial scan is done */ + if (atomic64_read(&ap_scan_bus_count) <= 0) + return; + /* processing a asynchronous bus rescan */ del_timer(&ap_config_timer); queue_work(system_long_wq, &ap_scan_work); -- cgit v1.2.3 From cfaef6516e9ac64e36967bd74d173b67fef6eee4 Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Thu, 26 Oct 2023 11:24:03 +0200 Subject: s390/zcrypt: don't report online if card or queue is in check-stop state If a card or a queue is in check-stop state, it can't be used by applications to perform crypto operations. Applications check the 'online' sysfs attribute to check if a card or queue is usable. Report a card or queue as offline in case it is in check-stop state. Furthermore, don't allow to set a card or queue online, if it is in check-stop state. This is similar to when the card or the queue is in deconfigured state, there it is also reported as being offline, and it can't be set online. Reviewed-by: Harald Freudenberger Signed-off-by: Ingo Franzki Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_card.c | 4 ++-- drivers/s390/crypto/zcrypt_queue.c | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c815722d0ac8..050462d95222 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_card *zc = dev_get_drvdata(dev); struct ap_card *ac = to_ap_card(dev); - int online = ac->config && zc->online ? 1 : 0; + int online = ac->config && !ac->chkstop && zc->online ? 1 : 0; return sysfs_emit(buf, "%d\n", online); } @@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev, if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; - if (online && !ac->config) + if (online && (!ac->config || ac->chkstop)) return -ENODEV; zc->online = online; diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 112a80e8e6c2..67d8e0ae0eec 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_queue *zq = dev_get_drvdata(dev); struct ap_queue *aq = to_ap_queue(dev); - int online = aq->config && zq->online ? 1 : 0; + int online = aq->config && !aq->chkstop && zq->online ? 1 : 0; return sysfs_emit(buf, "%d\n", online); } @@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev, if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; - if (online && (!aq->config || !aq->card->config)) + if (online && (!aq->config || !aq->card->config || + aq->chkstop || aq->card->chkstop)) return -ENODEV; if (online && !zc->online) return -EINVAL; -- cgit v1.2.3 From 5cf1a563a3284dc5c9f4ee8917ad2ebd51ff3def Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 26 Oct 2023 09:33:31 +0200 Subject: s390/ap: fix vanishing crypto cards in SE environment A secure execution (SE, also known as confidential computing) guest may see asynchronous errors on a crypto firmware queue. The current implementation to gather information about cards and queues in ap_queue_info() simple returns if an asynchronous error is hanging on the firmware queue. If such a situation happened and it was the only queue visible for a crypto card within an SE guest, then the card vanished from sysfs as the AP bus scan function refuses to hold a card without any type information. As lszcrypt evaluates the sysfs such a card vanished from the lszcrypt card listing and the user is baffled and has no way to reset and thus clear the pending asynchronous error. This patch improves the named function to also evaluate GR2 of the TAPQ in case of asynchronous error pending. If there is a not-null value stored in, the info is processed now. In the end, a queue with pending asynchronous error does not lead to a vanishing card any more. Reviewed-by: Holger Dengler Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d6ad437883fa..5dd33155d5d5 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); /* * ap_queue_info(): Check and get AP queue info. * Returns: 1 if APQN exists and info is filled, - * 0 if APQN seems to exit but there is no info + * 0 if APQN seems to exist but there is no info * available (eg. caused by an asynch pending error) * -1 invalid APQN, TAPQ error or AP queue status which * indicates there is no APQN. @@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, /* call TAPQ on this APQN */ status = ap_test_queue(qid, ap_apft_available(), &tapq_info); - /* handle pending async error with return 'no info available' */ - if (status.async) - return 0; - switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: case AP_RESPONSE_BUSY: - /* - * According to the architecture in all these cases the - * info should be filled. All bits 0 is not possible as - * there is at least one of the mode bits set. - */ - if (WARN_ON_ONCE(!tapq_info.value)) - return 0; - *q_type = tapq_info.at; - *q_fac = tapq_info.fac; - *q_depth = tapq_info.qd; - *q_ml = tapq_info.ml; - *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; - *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; - return 1; + /* For all these RCs the tapq info should be available */ + break; default: - /* - * A response code which indicates, there is no info available. - */ - return -1; + /* On a pending async error the info should be available */ + if (!status.async) + return -1; + break; } + + /* There should be at least one of the mode bits set */ + if (WARN_ON_ONCE(!tapq_info.value)) + return 0; + + *q_type = tapq_info.at; + *q_fac = tapq_info.fac; + *q_depth = tapq_info.qd; + *q_ml = tapq_info.ml; + *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; + *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; + + return 1; } void ap_wait(enum ap_sm_wait wait) -- cgit v1.2.3