diff options
author | Hieu Huynh <hieuh@os.amperecomputing.com> | 2022-07-12 13:18:25 +0300 |
---|---|---|
committer | Thang Q. Nguyen <thang@os.amperecomputing.com> | 2022-11-15 07:14:33 +0300 |
commit | 57fa7921b248f52858a48110d9670c62cea2eff0 (patch) | |
tree | 918fa4c51b5cd6f44cb099a105bf1ab5c4ae3fe4 /meta-ampere/meta-mitchell/recipes-ampere | |
parent | f35e6b7ecc6b5c5fdb561643831082226c586b6e (diff) | |
download | openbmc-57fa7921b248f52858a48110d9670c62cea2eff0.tar.xz |
meta-ampere: mtmitchell: support system firmware hang handler
Monitors GPIOF4 for 1P system and GPION5 for 2P system to detect if
system firmware hang. If these GPIOs stop asserting for more than three
seconds, BMC will trigger events and reset the system.
Tested:
1. Simulate the GPIO that deasserted for more than three seconds.
2. Fail to boot to Host, check the event log is created
and system will be reset.
Signed-off-by: Hieu Huynh <hieuh@os.amperecomputing.com>
Change-Id: I2c2d5d50e5ebd6d9b9e3435ab1c17393dac1d9cf
Diffstat (limited to 'meta-ampere/meta-mitchell/recipes-ampere')
3 files changed, 88 insertions, 0 deletions
diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb new file mode 100644 index 0000000000..7562cbdaa4 --- /dev/null +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler.bb @@ -0,0 +1,31 @@ +SUMMARY = "Ampere Computing LLC System Firmware Hang Handler" +DESCRIPTION = "A host control implementation suitable for Ampere Computing LLC's systems" +PR = "r1" +LICENSE = "Apache-2.0" +LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/Apache-2.0;md5=89aea4e17d99a7cacdbeed46a0096b10" + +inherit systemd +inherit obmc-phosphor-systemd + +RDEPENDS:${PN} = "bash" +FILESEXTRAPATHS:append := "${THISDIR}/${PN}:" + +SYSTEMD_PACKAGES = "${PN}" + +SRC_URI = " \ + file://ampere-sysfw-hang-handler.service \ + file://ampere_sysfw_hang_handler.sh \ + " + +SYSTEMD_SERVICE:${PN} += "ampere-sysfw-hang-handler.service" + +SYSFW_HANG_TGT = "ampere-sysfw-hang-handler.service" +SYSFW_HANG_INSTMPL = "ampere-sysfw-hang-handler.service" +AMPER_HOST_RUNNING = "obmc-host-already-on@{0}.target" +SYSFW_HANG_TARGET_FMT = "../${SYSFW_HANG_TGT}:${AMPER_HOST_RUNNING}.wants/${SYSFW_HANG_INSTMPL}" +SYSTEMD_LINK:${PN} += "${@compose_list_zip(d, 'SYSFW_HANG_TARGET_FMT', 'OBMC_HOST_INSTANCES')}" + +do_install() { + install -d ${D}/usr/sbin + install -m 0755 ${WORKDIR}/ampere_sysfw_hang_handler.sh ${D}/${sbindir}/ +} diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service new file mode 100644 index 0000000000..5eeaf5cc96 --- /dev/null +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere-sysfw-hang-handler.service @@ -0,0 +1,13 @@ +[Unit] +Description=Ampere System Firmware Hang Handler +After=obmc-host-already-on@0.target +BindTo=obmc-host-already-on@0.target +ConditionPathExists=!/var/ampere/sysfw-hang-disable + +[Service] +Type=simple +ExecStart=/usr/bin/env ampere_sysfw_hang_handler.sh +SyslogIdentifier=ampere_sysfw_hang + +[Install] +WantedBy=obmc-host-already-on@0.target diff --git a/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh new file mode 100644 index 0000000000..c3f77fd8b1 --- /dev/null +++ b/meta-ampere/meta-mitchell/recipes-ampere/host/ampere-sysfw-hang-handler/ampere_sysfw_hang_handler.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# shellcheck disable=SC2046 +# shellcheck source=/dev/null + +source /usr/sbin/gpio-lib.sh + +# Do event trigger +function sel_trigger() +{ + echo "Error: system firmware hang, trigger sel" + ampere_add_redfishevent.sh OpenBMC.0.1.SystemPowerOnFailed.Critical +} + +# Do reset the system +function reset_system() +{ + echo "Error: system firmware hang, reset the system" + ipmitool chassis power reset +} + +s0_last_hb_state=0 +cnt=-1 +while true +do + # Monitor heart beat GPIO value, GPIOF4 for Socket 0 + s0_hb_state=$(gpio_name_get s0-heartbeat) + if [ "$s0_last_hb_state" != "$s0_hb_state" ]; then + cnt=0 + else + cnt=$((cnt + 1)) + fi + + if [ "$cnt" -ge 6 ]; then + echo "Error: system firmware hang" + sel_trigger + reset_system + exit 0 + fi + s0_last_hb_state="$s0_hb_state" + sleep 0.5 +done + +exit 0 |