summaryrefslogtreecommitdiff
path: root/meta-ampere/meta-mitchell/recipes-ampere
diff options
context:
space:
mode:
authorHieu Huynh <hieuh@os.amperecomputing.com>2022-07-12 13:18:25 +0300
committerThang Q. Nguyen <thang@os.amperecomputing.com>2022-11-15 07:14:33 +0300
commit57fa7921b248f52858a48110d9670c62cea2eff0 (patch)
tree918fa4c51b5cd6f44cb099a105bf1ab5c4ae3fe4 /meta-ampere/meta-mitchell/recipes-ampere
parentf35e6b7ecc6b5c5fdb561643831082226c586b6e (diff)
downloadopenbmc-57fa7921b248f52858a48110d9670c62cea2eff0.tar.xz
meta-ampere: mtmitchell: support system firmware hang handler
Monitors GPIOF4 for 1P system and GPION5 for 2P system to detect if system firmware hang. If these GPIOs stop asserting for more than three seconds, BMC will trigger events and reset the system. Tested: 1. Simulate the GPIO that deasserted for more than three seconds. 2. Fail to boot to Host, check the event log is created and system will be reset. Signed-off-by: Hieu Huynh <hieuh@os.amperecomputing.com> Change-Id: I2c2d5d50e5ebd6d9b9e3435ab1c17393dac1d9cf
Diffstat (limited to 'meta-ampere/meta-mitchell/recipes-ampere')
-rw-r--r--meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb31
-rw-r--r--meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service13
-rw-r--r--meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh44
3 files changed, 88 insertions, 0 deletions
diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb
new file mode 100644
index 0000000000..7562cbdaa4
--- /dev/null
+++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb
@@ -0,0 +1,31 @@
+SUMMARY = "Ampere Computing LLC System Firmware Hang Handler"
+DESCRIPTION = "A host control implementation suitable for Ampere Computing LLC's systems"
+PR = "r1"
+LICENSE = "Apache-2.0"
+LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/Apache-2.0;md5=89aea4e17d99a7cacdbeed46a0096b10"
+
+inherit systemd
+inherit obmc-phosphor-systemd
+
+RDEPENDS:${PN} = "bash"
+FILESEXTRAPATHS:append := "${THISDIR}/${PN}:"
+
+SYSTEMD_PACKAGES = "${PN}"
+
+SRC_URI = " \
+ file://ampere-sysfw-hang-handler.service \
+ file://ampere_sysfw_hang_handler.sh \
+ "
+
+SYSTEMD_SERVICE:${PN} += "ampere-sysfw-hang-handler.service"
+
+SYSFW_HANG_TGT = "ampere-sysfw-hang-handler.service"
+SYSFW_HANG_INSTMPL = "ampere-sysfw-hang-handler.service"
+AMPER_HOST_RUNNING = "obmc-host-already-on@{0}.target"
+SYSFW_HANG_TARGET_FMT = "../${SYSFW_HANG_TGT}:${AMPER_HOST_RUNNING}.wants/${SYSFW_HANG_INSTMPL}"
+SYSTEMD_LINK:${PN} += "${@compose_list_zip(d, 'SYSFW_HANG_TARGET_FMT', 'OBMC_HOST_INSTANCES')}"
+
+do_install() {
+ install -d ${D}/usr/sbin
+ install -m 0755 ${WORKDIR}/ampere_sysfw_hang_handler.sh ${D}/${sbindir}/
+}
diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service
new file mode 100644
index 0000000000..5eeaf5cc96
--- /dev/null
+++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Ampere System Firmware Hang Handler
+After=obmc-host-already-on@0.target
+BindTo=obmc-host-already-on@0.target
+ConditionPathExists=!/var/ampere/sysfw-hang-disable
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/env ampere_sysfw_hang_handler.sh
+SyslogIdentifier=ampere_sysfw_hang
+
+[Install]
+WantedBy=obmc-host-already-on@0.target
diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh
new file mode 100644
index 0000000000..c3f77fd8b1
--- /dev/null
+++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# shellcheck disable=SC2046
+# shellcheck source=/dev/null
+
+source /usr/sbin/gpio-lib.sh
+
+# Do event trigger
+function sel_trigger()
+{
+ echo "Error: system firmware hang, trigger sel"
+ ampere_add_redfishevent.sh OpenBMC.0.1.SystemPowerOnFailed.Critical
+}
+
+# Do reset the system
+function reset_system()
+{
+ echo "Error: system firmware hang, reset the system"
+ ipmitool chassis power reset
+}
+
+s0_last_hb_state=0
+cnt=-1
+while true
+do
+ # Monitor heart beat GPIO value, GPIOF4 for Socket 0
+ s0_hb_state=$(gpio_name_get s0-heartbeat)
+ if [ "$s0_last_hb_state" != "$s0_hb_state" ]; then
+ cnt=0
+ else
+ cnt=$((cnt + 1))
+ fi
+
+ if [ "$cnt" -ge 6 ]; then
+ echo "Error: system firmware hang"
+ sel_trigger
+ reset_system
+ exit 0
+ fi
+ s0_last_hb_state="$s0_hb_state"
+ sleep 0.5
+done
+
+exit 0