summaryrefslogtreecommitdiff
path: root/meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf
diff options
context:
space:
mode:
Diffstat (limited to 'meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf')
-rw-r--r--meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf31
1 files changed, 31 insertions, 0 deletions
diff --git a/meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf b/meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf
new file mode 100644
index 0000000000..54516c2d47
--- /dev/null
+++ b/meta-phosphor/recipes-phosphor/systemd-policy/phosphor-systemd-policy/service-restart-policy.conf
@@ -0,0 +1,31 @@
+# This file overrides some defaults for systemd
+#
+# - Change the RestartSec from 100ms to 1s.
+# When a service hits a failure, our new debug collection service kicks
+# in. When a core file is involved, it's been found that generating 5 core
+# files within ~500ms puts a huge strain on the BMC. Also, if the bmc is
+# going to get a fix on a restart of a service, the more time the better
+# (think retries on device driver scenarios).
+#
+# - Change the StartLimitBurst to 2
+# Five just seems excessive for our services in openbmc. In all fail
+# scenarios seen so far (other then with phosphor-hwmon), either
+# restarting once does the job or restarting all 5 times does not help
+# and we just end up hitting the 5 limit anyway.
+#
+# - Change the StartLimitIntervalSec to 30s
+# The BMC CPU performance is already challenged. When a service is
+# failing and a core dump is being generated and collected into a dump,
+# it's even more challenged. Recent failures have shown situations where
+# the service does not fail again until 15-20 seconds after the initial
+# failure which means the default of 10s for this results in the service
+# being restarted indefinitely. Change this to 30s to only allow a service
+# to be restarted StartLimitBurst times within a 30s interval before
+# being put in a permanent fail state.
+#
+# See systemd-system.conf(5) for details on the conf files
+
+[Manager]
+DefaultRestartSec=1s
+DefaultStartLimitBurst=2
+DefaultStartLimitIntervalSec=30s