From e9414265dea4f73d2c5488a6bbd63c24cadc2825 Mon Sep 17 00:00:00 2001 From: Ben Pai Date: Mon, 25 Nov 2019 16:54:29 +0800 Subject: meta-ibm: Add GPU themal policy Mihawk's GPU temperature exceeds 92 degrees Celcius should be shutsown to avoid GPU damage. Tested: GPU temperature exceeds 92 degrees Celcius system will shutdown. (From meta-ibm rev: 623eb4dda626dd3dfb3f14d9afa4e10c86d3bbca) Change-Id: Id085afa2a7d7a29a42cd5d508a03fb64dd53c108 Signed-off-by: Ben Pai Signed-off-by: Brad Bishop --- .../dbus/thermal-policy/mihawk/thermal-policy.yaml | 86 ++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'meta-ibm/meta-witherspoon/recipes-phosphor/dbus') diff --git a/meta-ibm/meta-witherspoon/recipes-phosphor/dbus/thermal-policy/mihawk/thermal-policy.yaml b/meta-ibm/meta-witherspoon/recipes-phosphor/dbus/thermal-policy/mihawk/thermal-policy.yaml index bb3226af3..727630cb8 100644 --- a/meta-ibm/meta-witherspoon/recipes-phosphor/dbus/thermal-policy/mihawk/thermal-policy.yaml +++ b/meta-ibm/meta-witherspoon/recipes-phosphor/dbus/thermal-policy/mihawk/thermal-policy.yaml @@ -116,6 +116,29 @@ - meta: SENSOR path: /xyz/openbmc_project/sensors/temperature/ambient_temp +- name: gpu sensors + description: > + 'Each gpu has its own temperature sensor.' + class: group + group: path + members: + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu0 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu1 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu2 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu3 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu4 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu5 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu6 + - meta: SENSOR + path: /xyz/openbmc_project/sensors/temperature/gpu7 + - name: core temp description: > 'Monitor the temperature of each core.' @@ -138,6 +161,17 @@ meta: TEMP property: Value +- name: gpu temp + description: > + 'Monitor the temperature of each gpu core.' + class: group + group: property + type: int64 + members: + - interface: xyz.openbmc_project.Sensor.Value + meta: TEMP + property: Value + - name: watch core temps description: > 'Trigger logic on core temp changes.' @@ -156,6 +190,15 @@ properties: ambient temp callback: check ambient temp +- name: watch gpu temps + description: > + 'Trigger logic on gpu core temp changes.' + class: watch + watch: property + paths: gpu sensors + properties: gpu temp + callback: check gpu temps + - name: check temps description: > 'If this condition passes at least three cores are running @@ -185,6 +228,20 @@ bound: 45000 oneshot: true +- name: check gpu temps + description: > + 'If the gpu temperature sensor is too hot. Shut the system down.' + class: condition + condition: count + paths: gpu sensors + properties: gpu temp + callback: gpu log and shutdown + countop: '>=' + countbound: 1 + op: '>=' + bound: 92 + oneshot: true + - name: log and shutdown description: > 'Shut the system down and log an event.' @@ -205,6 +262,16 @@ - create ambient criticalhigh error - create ambient shutdown error +- name: gpu log and shutdown + description: > + 'Shut the system down and log an event.' + class: callback + callback: group + members: + - shutdown + - create gpu criticalhigh error + - create gpu shutdown error + - name: shutdown description: > 'Shut down the system.' @@ -240,6 +307,16 @@ error: xyz::openbmc_project::Sensor::Threshold::Error::CriticalHigh metadata: xyz::openbmc_project::Sensor::Threshold::CriticalHigh::SENSOR_DATA +- name: create gpu criticalhigh error + description: > + 'Create a GPU CriticalHigh Error log.' + class: callback + callback: elog_with_metadata + paths: gpu sensors + properties: gpu temp + error: xyz::openbmc_project::Sensor::Threshold::Error::CriticalHigh + metadata: xyz::openbmc_project::Sensor::Threshold::CriticalHigh::SENSOR_DATA + - name: create shutdown error description: > 'Create a SystemShutdown Error log.' @@ -257,3 +334,12 @@ paths: ambient sensor properties: ambient temp error: xyz::openbmc_project::State::Shutdown::ThermalEvent::Error::Ambient + +- name: create gpu shutdown error + description: > + 'Create a SystemShutdown Error log.' + class: callback + callback: elog + paths: gpu sensors + properties: gpu temp + error: xyz::openbmc_project::State::Shutdown::ThermalEvent::Error::GPU -- cgit v1.2.3