From 690823052ca657b32835d5bff3c20c387995c6cc Mon Sep 17 00:00:00 2001 From: Davin McCall Date: Thu, 28 Dec 2017 00:07:32 +0000 Subject: [PATCH] Fix potential issue stopping process services. Process services undergoing smooth recovery count as "STARTED" but have a pid of -1, or a valid pid but with a process launch underway (waiting for exec status). If all dependencies stopped at the wrong time, the behaviour was to treat the service as stopped, but the smooth recovery was still kicking on and could cause an issue if it "succeeded" after the service was supposedly terminated. To resolve this, don't send a kill signal to a process during smooth recovery (i.e. if waiting_for_execstat is true) and send it once recovery has completed instead. --- src/service.cc | 64 +++++++++++++++++++++++++++++++++++++++++++++++--- src/service.h | 11 +++++---- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/src/service.cc b/src/service.cc index 9779f35..0a58fb9 100644 --- a/src/service.cc +++ b/src/service.cc @@ -380,9 +380,22 @@ rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) n sr->failed_to_start(); } else if (sr->service_state == service_state_t::STOPPING) { - // Must be a scripted service. We've logged the failure, but it's probably better - // not to leave the service in STARTED state: - sr->stopped(); + // Must be a scripted service, or a regular process service that happened to be in smooth + // recovery when the stop was issued. + if (sr->record_type == service_type::PROCESS) { + if (sr->stop_check_dependents()) { + sr->stopped(); + } + } + else { + // We've logged the failure, but it's probably better not to leave the service in + // STOPPING state: + sr->stopped(); + } + } + else if (sr->service_state == service_state_t::STARTED) { + // Process service in smooth recovery: + sr->emergency_stop(); } } else { @@ -394,6 +407,14 @@ rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) n if (sr->service_state == service_state_t::STARTING) { sr->started(); } + else if (sr->service_state == service_state_t::STOPPING) { + // stopping, but smooth recovery was in process. That's now over so we can + // commence normal stop. Note that if pid == -1 the process already stopped(!), + // that's handled below. + if (sr->pid != -1 && sr->stop_check_dependents()) { + sr->all_deps_stopped(); + } + } } if (sr->pid == -1) { @@ -1247,6 +1268,43 @@ void base_process_service::all_deps_stopped() noexcept } } +void process_service::all_deps_stopped() noexcept +{ + waiting_for_deps = false; + if (waiting_for_execstat) { + // The process is still starting. This should be uncommon, but can occur during + // smooth recovery. We can't do much now; we have to wait until we get the + // status, and then act appropriately. + return; + } + else if (pid != -1) { + // The process is still kicking on - must actually kill it. We signal the process + // group (-pid) rather than just the process as there's less risk then of creating + // an orphaned process group: + if (! onstart_flags.no_sigterm) { + kill_pg(SIGTERM); + } + if (term_signal != -1) { + kill_pg(term_signal); + } + + // In most cases, the rest is done in handle_exit_status. + // If we are a BGPROCESS and the process is not our immediate child, however, that + // won't work - check for this now: + if (record_type == service_type::BGPROCESS && ! tracking_child) { + stopped(); + } + else if (stop_timeout != time_val(0,0)) { + restart_timer.arm_timer_rel(eventLoop, stop_timeout); + stop_timer_armed = true; + } + } + else { + // The process is already dead. + stopped(); + } +} + void scripted_service::all_deps_stopped() noexcept { waiting_for_deps = false; diff --git a/src/service.h b/src/service.h index b4bac6b..7d78c4a 100644 --- a/src/service.h +++ b/src/service.h @@ -349,7 +349,7 @@ class service_record // stop immediately void emergency_stop() noexcept; - // All dependents have stopped. + // All dependents have stopped, and this service should proceed to stop. virtual void all_deps_stopped() noexcept; // Service has actually stopped (includes having all dependents @@ -647,7 +647,8 @@ class base_process_service : public service_record bool waiting_restart_timer : 1; bool stop_timer_armed : 1; bool reserved_child_watch : 1; - bool tracking_child : 1; + bool tracking_child : 1; // whether we expect to see child process status + bool start_is_interruptible : 1; // whether we can interrupt start // Start the process, return true on success virtual bool start_ps_process() noexcept override; @@ -661,6 +662,9 @@ class base_process_service : public service_record void do_smooth_recovery() noexcept; virtual void all_deps_stopped() noexcept override; + + // Called when the process exits. The exit_status is the status value yielded by + // the "wait" system call. virtual void handle_exit_status(int exit_status) noexcept = 0; virtual bool can_interrupt_start() noexcept override @@ -709,9 +713,8 @@ class base_process_service : public service_record class process_service : public base_process_service { - // called when the process exits. The exit_status is the status value yielded by - // the "wait" system call. virtual void handle_exit_status(int exit_status) noexcept override; + virtual void all_deps_stopped() noexcept override; public: process_service(service_set *sset, string name, string &&command, -- 2.25.1