From 632b9671f7a39c0857d405d202915557dd586dcf Mon Sep 17 00:00:00 2001 From: Davin McCall Date: Sun, 7 Jan 2018 23:39:55 +0000 Subject: [PATCH] Implement start timeout for services, default of 60 seconds. If a service doesn't start within the timeout it is sent SIGINT, and then SIGKILL after the stop timeout (if there is one). --- src/service.cc | 33 +++++++++++++++++++++++++++++---- src/service.h | 22 +++++++++++++++++----- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/src/service.cc b/src/service.cc index a3af696..ce2ec5d 100644 --- a/src/service.cc +++ b/src/service.cc @@ -901,7 +901,12 @@ bool base_process_service::bring_up() noexcept else { eventLoop.get_time(restart_interval_time, clock_type::MONOTONIC); restart_interval_count = 0; - return start_ps_process(exec_arg_parts, onstart_flags.starts_on_console); + if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) { + restart_timer.arm_timer_rel(eventLoop, start_timeout); + stop_timer_armed = true; + return true; + } + return false; } } @@ -1492,6 +1497,15 @@ bool base_process_service::interrupt_start() noexcept else { log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT)."); kill_pg(SIGINT); + if (stop_timeout != time_val(0,0)) { + restart_timer.arm_timer(eventLoop, stop_timeout); + stop_timer_armed = true; + } + else if (stop_timer_armed) { + restart_timer.stop_timer(eventLoop); + stop_timer_armed = false; + } + set_state(service_state_t::STOPPING); return false; } } @@ -1506,13 +1520,24 @@ void base_process_service::kill_with_fire() noexcept dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count) { + service->stop_timer_armed = false; + + // Timer expires if: + // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are + // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting, + // including smooth recovery (restart timeout, state is STARTING or STARTED). if (service->get_state() == service_state_t::STOPPING) { service->kill_with_fire(); - service->stop_timer_armed = false; + } + else if (service->pid != -1) { + // Starting, start timed out. + service->interrupt_start(); } else { - // STARTING / STARTED: + // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED) service->do_restart(); } - return dasynq::rearm::DISARM; + + // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed: + return dasynq::rearm::NOOP; } diff --git a/src/service.h b/src/service.h index 0c8e208..7287501 100644 --- a/src/service.h +++ b/src/service.h @@ -429,6 +429,12 @@ class service_record // Called on transition of desired state from started to stopped (or unpinned start) void do_stop() noexcept; + // Set the service state + void set_state(service_state_t new_state) noexcept + { + service_state = new_state; + } + // Virtual functions, to be implemented by service implementations: // Do any post-dependency startup; return false on failure @@ -635,16 +641,20 @@ class base_process_service : public service_record // Restart interval time and restart count are used to track the number of automatic restarts // over an interval. Too many restarts over an interval will inhibit further restarts. - time_val restart_interval_time; - int restart_interval_count; + time_val restart_interval_time; // current restart interval + int restart_interval_count; // count of restarts within current interval - time_val restart_interval; - int max_restart_interval_count; - time_val restart_delay; + time_val restart_interval; // maximum restart interval + int max_restart_interval_count; // number of restarts allowed over maximum interval + time_val restart_delay; // delay between restarts // Time allowed for service stop, after which SIGKILL is sent. 0 to disable. time_val stop_timeout = {10, 0}; // default of 10 seconds + // Time allowed for service start, after which SIGINT is sent (and then SIGKILL after + // ). 0 to disable. + time_val start_timeout = {60, 0}; // default of 1 minute + bool waiting_restart_timer : 1; bool stop_timer_armed : 1; bool reserved_child_watch : 1; @@ -653,6 +663,8 @@ class base_process_service : public service_record // Start the process, return true on success virtual bool bring_up() noexcept override; + + // Launch the process with the given arguments, return true on success bool start_ps_process(const std::vector &args, bool on_console) noexcept; // Restart the process (due to start failure or unexpected termination). Restarts will be -- 2.25.1