Split service.cc into multiple files.
authorDavin McCall <davmac@davmac.org>
Thu, 11 Jan 2018 09:04:54 +0000 (09:04 +0000)
committerDavin McCall <davmac@davmac.org>
Thu, 11 Jan 2018 09:04:54 +0000 (09:04 +0000)
service.cc becomes service.cc + proc-service.cc + baseproc-service.cc.
The header service.h becomes service.h + proc-service.h.

This refactoring should make testing easier.

src/Makefile
src/baseproc-service.cc [new file with mode: 0644]
src/dinit-util.h
src/load_service.cc
src/proc-service.cc [new file with mode: 0644]
src/proc-service.h [new file with mode: 0644]
src/service.cc
src/service.h

index 54ad5d9633d97e5ed9acc76719c9fb7c9d1134ce..be9cc4ffd93698593dc8f25b7d4bb2404ab0ccad 100644 (file)
@@ -4,9 +4,11 @@ ifeq ($(BUILD_SHUTDOWN),yes)
   SHUTDOWN=shutdown
 endif
 
-objects = dinit.o load_service.o service.o control.o dinit-log.o dinit-main.o dinitctl.o shutdown.o
+#objects = dinit.o load_service.o service.o proc-service.o baseproc-service.o control.o dinit-log.o dinit-main.o dinitctl.o shutdown.o
 
-dinit_objects = dinit.o load_service.o service.o control.o dinit-log.o dinit-main.o
+dinit_objects = dinit.o load_service.o service.o proc-service.o baseproc-service.o control.o dinit-log.o dinit-main.o
+
+objects = $(dinit_objects) dinitctl.o shtudown.o
 
 all: dinit dinitctl $(SHUTDOWN)
 
diff --git a/src/baseproc-service.cc b/src/baseproc-service.cc
new file mode 100644 (file)
index 0000000..dc13afe
--- /dev/null
@@ -0,0 +1,318 @@
+#include "dinit-socket.h"
+#include "proc-service.h"
+
+/*
+ * Base process implementation (base_process_service).
+ *
+ * See proc-service.h for interface documentation.
+ */
+
+extern eventloop_t event_loop;
+
+using clock_type = dasynq::clock_type;
+using rearm = dasynq::rearm;
+using time_val = dasynq::time_val;
+
+void base_process_service::do_smooth_recovery() noexcept
+{
+    if (! restart_ps_process()) {
+        emergency_stop();
+        services->process_queues();
+    }
+}
+
+bool base_process_service::bring_up() noexcept
+{
+    if (restarting) {
+        if (pid == -1) {
+            return restart_ps_process();
+        }
+        return true;
+    }
+    else {
+        event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
+        restart_interval_count = 0;
+        if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
+            if (start_timeout != time_val(0,0)) {
+                restart_timer.arm_timer_rel(event_loop, start_timeout);
+                stop_timer_armed = true;
+            }
+            else if (stop_timer_armed) {
+                restart_timer.stop_timer(event_loop);
+                stop_timer_armed = false;
+            }
+            return true;
+        }
+        return false;
+    }
+}
+
+bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
+{
+    // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
+    // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
+    // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
+    // is written to the pipe, and the parent can read it.
+
+    event_loop.get_time(last_start_time, clock_type::MONOTONIC);
+
+    int pipefd[2];
+    if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
+        log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
+        return false;
+    }
+
+    const char * logfile = this->logfile.c_str();
+    if (*logfile == 0) {
+        logfile = "/dev/null";
+    }
+
+    bool child_status_registered = false;
+    control_conn_t *control_conn = nullptr;
+
+    int control_socket[2] = {-1, -1};
+    if (onstart_flags.pass_cs_fd) {
+        if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
+            log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
+            goto out_p;
+        }
+
+        // Make the server side socket close-on-exec:
+        int fdflags = fcntl(control_socket[0], F_GETFD);
+        fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
+
+        try {
+            control_conn = new control_conn_t(event_loop, services, control_socket[0]);
+        }
+        catch (std::exception &exc) {
+            log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
+            goto out_cs;
+        }
+    }
+
+    // Set up complete, now fork and exec:
+
+    pid_t forkpid;
+
+    try {
+        child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
+        child_status_registered = true;
+
+        // We specify a high priority (i.e. low priority value) so that process termination is
+        // handled early. This means we have always recorded that the process is terminated by the
+        // time that we handle events that might otherwise cause us to signal the process, so we
+        // avoid sending a signal to an invalid (and possibly recycled) process ID.
+        forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
+        reserved_child_watch = true;
+    }
+    catch (std::exception &e) {
+        log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
+        goto out_cs_h;
+    }
+
+    if (forkpid == 0) {
+        run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
+    }
+    else {
+        // Parent process
+        close(pipefd[1]); // close the 'other end' fd
+        if (control_socket[1] != -1) {
+            close(control_socket[1]);
+        }
+        pid = forkpid;
+
+        waiting_for_execstat = true;
+        return true;
+    }
+
+    // Failure exit:
+
+    out_cs_h:
+    if (child_status_registered) {
+        child_status_listener.deregister(event_loop);
+    }
+
+    if (onstart_flags.pass_cs_fd) {
+        delete control_conn;
+
+        out_cs:
+        close(control_socket[0]);
+        close(control_socket[1]);
+    }
+
+    out_p:
+    close(pipefd[0]);
+    close(pipefd[1]);
+
+    return false;
+}
+
+void base_process_service::bring_down() noexcept
+{
+    waiting_for_deps = false;
+    if (pid != -1) {
+        // The process is still kicking on - must actually kill it. We signal the process
+        // group (-pid) rather than just the process as there's less risk then of creating
+        // an orphaned process group:
+        if (! onstart_flags.no_sigterm) {
+            kill_pg(SIGTERM);
+        }
+        if (term_signal != -1) {
+            kill_pg(term_signal);
+        }
+
+        // In most cases, the rest is done in handle_exit_status.
+        // If we are a BGPROCESS and the process is not our immediate child, however, that
+        // won't work - check for this now:
+        if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
+            stopped();
+        }
+        else if (stop_timeout != time_val(0,0)) {
+            restart_timer.arm_timer_rel(event_loop, stop_timeout);
+            stop_timer_armed = true;
+        }
+    }
+    else {
+        // The process is already dead.
+        stopped();
+    }
+}
+
+base_process_service::base_process_service(service_set *sset, string name,
+        service_type_t service_type_p, string &&command,
+        std::list<std::pair<unsigned,unsigned>> &command_offsets,
+        const std::list<prelim_dep> &deplist_p)
+     : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
+       child_status_listener(this), restart_timer(this)
+{
+    program_name = std::move(command);
+    exec_arg_parts = separate_args(program_name, command_offsets);
+
+    restart_interval_count = 0;
+    restart_interval_time = {0, 0};
+    restart_timer.service = this;
+    restart_timer.add_timer(event_loop);
+
+    // By default, allow a maximum of 3 restarts within 10.0 seconds:
+    restart_interval.seconds() = 10;
+    restart_interval.nseconds() = 0;
+    max_restart_interval_count = 3;
+
+    waiting_restart_timer = false;
+    reserved_child_watch = false;
+    tracking_child = false;
+    stop_timer_armed = false;
+    start_is_interruptible = false;
+}
+
+void base_process_service::do_restart() noexcept
+{
+    waiting_restart_timer = false;
+    restart_interval_count++;
+    auto service_state = get_state();
+
+    // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
+    // the process should be granted access to the console:
+    bool on_console = service_state == service_state_t::STARTING
+            ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
+
+    if (service_state == service_state_t::STARTING) {
+        // for a smooth recovery, we want to check dependencies are available before actually
+        // starting:
+        if (! check_deps_started()) {
+            waiting_for_deps = true;
+            return;
+        }
+    }
+
+    if (! start_ps_process(exec_arg_parts, on_console)) {
+        restarting = false;
+        if (service_state == service_state_t::STARTING) {
+            failed_to_start();
+        }
+        else {
+            // desired_state = service_state_t::STOPPED;
+            forced_stop();
+        }
+        services->process_queues();
+    }
+}
+
+bool base_process_service::restart_ps_process() noexcept
+{
+    using time_val = dasynq::time_val;
+
+    time_val current_time;
+    event_loop.get_time(current_time, clock_type::MONOTONIC);
+
+    if (max_restart_interval_count != 0) {
+        // Check whether we're still in the most recent restart check interval:
+        time_val int_diff = current_time - restart_interval_time;
+        if (int_diff < restart_interval) {
+            if (restart_interval_count >= max_restart_interval_count) {
+                log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
+                return false;
+            }
+        }
+        else {
+            restart_interval_time = current_time;
+            restart_interval_count = 0;
+        }
+    }
+
+    // Check if enough time has lapsed since the prevous restart. If not, start a timer:
+    time_val tdiff = current_time - last_start_time;
+    if (restart_delay <= tdiff) {
+        // > restart delay (normally 200ms)
+        do_restart();
+    }
+    else {
+        time_val timeout = restart_delay - tdiff;
+        restart_timer.arm_timer_rel(event_loop, timeout);
+        waiting_restart_timer = true;
+    }
+    return true;
+}
+
+bool base_process_service::interrupt_start() noexcept
+{
+    if (waiting_restart_timer) {
+        restart_timer.stop_timer(event_loop);
+        waiting_restart_timer = false;
+        return service_record::interrupt_start();
+    }
+    else {
+        log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
+        kill_pg(SIGINT);
+        if (stop_timeout != time_val(0,0)) {
+            restart_timer.arm_timer_rel(event_loop, stop_timeout);
+            stop_timer_armed = true;
+        }
+        else if (stop_timer_armed) {
+            restart_timer.stop_timer(event_loop);
+            stop_timer_armed = false;
+        }
+        set_state(service_state_t::STOPPING);
+        notify_listeners(service_event_t::STARTCANCELLED);
+        return false;
+    }
+}
+
+void base_process_service::kill_with_fire() noexcept
+{
+    if (pid != -1) {
+        log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
+        kill_pg(SIGKILL);
+    }
+}
+
+void base_process_service::kill_pg(int signo) noexcept
+{
+    pid_t pgid = getpgid(pid);
+    if (pgid == -1) {
+        // only should happen if pid is invalid, which should never happen...
+        log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
+        return;
+    }
+    kill(-pgid, signo);
+}
index 947878e5e05964cc837c7a29ec5485469dae58ca..77170f443fa75354888ba3e8d0c880002ae8ec27 100644 (file)
@@ -1,8 +1,12 @@
 #ifndef DINIT_UTIL_H_INCLUDED
 #define DINIT_UTIL_H_INCLUDED 1
 
+#include <cstddef>
 #include <cerrno>
 
+#include <sys/types.h>
+#include <unistd.h>
+
 // Signal-safe read. Read and re-try if interrupted by signal (EINTR).
 // *May* affect errno even on a successful read (when the return is less than n).
 inline ssize_t ss_read(int fd, void * buf, size_t n)
index 7a3578ffb78da955282f0fcc7bb5632645a9ae83..52937b03b99e3cf821c00f87104961157d2e4c79 100644 (file)
@@ -10,7 +10,7 @@
 #include <pwd.h>
 #include <grp.h>
 
-#include "service.h"
+#include "proc-service.h"
 
 using string = std::string;
 using string_iterator = std::string::iterator;
diff --git a/src/proc-service.cc b/src/proc-service.cc
new file mode 100644 (file)
index 0000000..0aeb9e3
--- /dev/null
@@ -0,0 +1,494 @@
+#include <sys/un.h>
+#include <sys/socket.h>
+
+#include "dinit-socket.h"
+#include "dinit-util.h"
+#include "proc-service.h"
+
+extern eventloop_t event_loop;
+
+using clock_type = dasynq::clock_type;
+using rearm = dasynq::rearm;
+using time_val = dasynq::time_val;
+
+rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
+{
+    base_process_service *sr = service;
+    sr->waiting_for_execstat = false;
+
+    int exec_status;
+    int r = read(get_watched_fd(), &exec_status, sizeof(int));
+    deregister(loop);
+    close(get_watched_fd());
+
+    if (r > 0) {
+        // We read an errno code; exec() failed, and the service startup failed.
+        if (sr->pid != -1) {
+            sr->child_listener.deregister(event_loop, sr->pid);
+            sr->reserved_child_watch = false;
+            if (sr->stop_timer_armed) {
+                sr->restart_timer.stop_timer(loop);
+                sr->stop_timer_armed = false;
+            }
+        }
+        sr->pid = -1;
+        sr->exec_failed(exec_status);
+    }
+    else {
+        // exec() succeeded.
+        if (sr->get_type() == service_type_t::PROCESS) {
+            // This could be a smooth recovery (state already STARTED). Even more, the process
+            // might be stopped (and killed via a signal) during smooth recovery.  We don't to
+            // process startup again in either case, so we check for state STARTING:
+            if (sr->get_state() == service_state_t::STARTING) {
+                sr->started();
+            }
+            else if (sr->get_state() == service_state_t::STOPPING) {
+                // stopping, but smooth recovery was in process. That's now over so we can
+                // commence normal stop. Note that if pid == -1 the process already stopped(!),
+                // that's handled below.
+                if (sr->pid != -1 && sr->stop_check_dependents()) {
+                    sr->bring_down();
+                }
+            }
+        }
+
+        if (sr->pid == -1) {
+            // Somehow the process managed to complete before we even saw the status.
+            sr->handle_exit_status(sr->exit_status);
+        }
+    }
+
+    sr->services->process_queues();
+
+    return rearm::REMOVED;
+}
+
+dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
+{
+    base_process_service *sr = service;
+
+    sr->pid = -1;
+    sr->exit_status = status;
+
+    // Ok, for a process service, any process death which we didn't rig
+    // ourselves is a bit... unexpected. Probably, the child died because
+    // we asked it to (sr->service_state == STOPPING). But even if
+    // we didn't, there's not much we can do.
+
+    if (sr->waiting_for_execstat) {
+        // We still don't have an exec() status from the forked child, wait for that
+        // before doing any further processing.
+        return dasynq::rearm::NOOP; // hold watch reservation
+    }
+
+    // Must stop watch now since handle_exit_status might result in re-launch:
+    // (stop_watch instead of deregister, so that we hold watch reservation).
+    stop_watch(loop);
+
+    if (sr->stop_timer_armed) {
+        sr->restart_timer.stop_timer(loop);
+        sr->stop_timer_armed = false;
+    }
+
+    sr->handle_exit_status(status);
+    return dasynq::rearm::NOOP;
+}
+
+void process_service::handle_exit_status(int exit_status) noexcept
+{
+    bool did_exit = WIFEXITED(exit_status);
+    bool was_signalled = WIFSIGNALED(exit_status);
+    restarting = false;
+    auto service_state = get_state();
+
+    if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+        if (did_exit) {
+            log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
+                    WEXITSTATUS(exit_status));
+        }
+        else if (was_signalled) {
+            log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
+                    WTERMSIG(exit_status));
+        }
+    }
+
+    if (service_state == service_state_t::STARTING) {
+        if (did_exit && WEXITSTATUS(exit_status) == 0) {
+            started();
+        }
+        else {
+            failed_to_start();
+        }
+    }
+    else if (service_state == service_state_t::STOPPING) {
+        // We won't log a non-zero exit status or termination due to signal here -
+        // we assume that the process died because we signalled it.
+        stopped();
+    }
+    else if (smooth_recovery && service_state == service_state_t::STARTED
+            && get_target_state() == service_state_t::STARTED) {
+        do_smooth_recovery();
+        return;
+    }
+    else {
+        emergency_stop();
+    }
+    services->process_queues();
+}
+
+void process_service::exec_failed(int errcode) noexcept
+{
+    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+    if (get_state() == service_state_t::STARTING) {
+        failed_to_start();
+    }
+    else {
+        // Process service in smooth recovery:
+        emergency_stop();
+    }
+}
+
+void bgproc_service::handle_exit_status(int exit_status) noexcept
+{
+    begin:
+    bool did_exit = WIFEXITED(exit_status);
+    bool was_signalled = WIFSIGNALED(exit_status);
+    auto service_state = get_state();
+
+    if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+        if (did_exit) {
+            log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
+                    WEXITSTATUS(exit_status));
+        }
+        else if (was_signalled) {
+            log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
+                    WTERMSIG(exit_status));
+        }
+    }
+
+    // This may be a "smooth recovery" where we are restarting the process while leaving the
+    // service in the STARTED state.
+    if (restarting && service_state == service_state_t::STARTED) {
+        restarting = false;
+        bool need_stop = false;
+        if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
+            need_stop = true;
+        }
+        else {
+            // We need to re-read the PID, since it has now changed.
+            if (pid_file.length() != 0) {
+                auto pid_result = read_pid_file(&exit_status);
+                switch (pid_result) {
+                    case pid_result_t::FAILED:
+                        // Failed startup: no auto-restart.
+                        need_stop = true;
+                        break;
+                    case pid_result_t::TERMINATED:
+                        goto begin;
+                    case pid_result_t::OK:
+                        break;
+                }
+            }
+        }
+
+        if (need_stop) {
+            // Failed startup: no auto-restart.
+            emergency_stop();
+            services->process_queues();
+        }
+
+        return;
+    }
+
+    restarting = false;
+    if (service_state == service_state_t::STARTING) {
+        // POSIX requires that if the process exited clearly with a status code of 0,
+        // the exit status value will be 0:
+        if (exit_status == 0) {
+            auto pid_result = read_pid_file(&exit_status);
+            switch (pid_result) {
+                case pid_result_t::FAILED:
+                    // Failed startup: no auto-restart.
+                    failed_to_start();
+                    break;
+                case pid_result_t::TERMINATED:
+                    // started, but immediately terminated
+                    started();
+                    goto begin;
+                case pid_result_t::OK:
+                    started();
+                    break;
+            }
+        }
+        else {
+            failed_to_start();
+        }
+    }
+    else if (service_state == service_state_t::STOPPING) {
+        // We won't log a non-zero exit status or termination due to signal here -
+        // we assume that the process died because we signalled it.
+        stopped();
+    }
+    else {
+        // we must be STARTED
+        if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
+            do_smooth_recovery();
+            return;
+        }
+        if (! do_auto_restart() && start_explicit) {
+            start_explicit = false;
+            release();
+        }
+        forced_stop();
+        stop_dependents();
+        stopped();
+    }
+    services->process_queues();
+}
+
+void bgproc_service::exec_failed(int errcode) noexcept
+{
+    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+    // Only time we execute is for startup:
+    failed_to_start();
+}
+
+void scripted_service::handle_exit_status(int exit_status) noexcept
+{
+    bool did_exit = WIFEXITED(exit_status);
+    bool was_signalled = WIFSIGNALED(exit_status);
+    auto service_state = get_state();
+
+    // For a scripted service, a termination occurs in one of three main cases:
+    // - the start script completed (or failed), when service was STARTING
+    // - the start script was interrupted to cancel startup; state is STOPPING
+    // - the stop script complete (or failed), state is STOPPING
+
+    if (service_state == service_state_t::STOPPING) {
+        // We might be running the stop script, or we might be running the start script and have issued
+        // a cancel order via SIGINT:
+        if (did_exit && WEXITSTATUS(exit_status) == 0) {
+            if (interrupting_start) {
+                interrupting_start = false;
+                // launch stop script:
+                bring_down();
+            }
+            else {
+                // We were running the stop script and finished successfully
+                stopped();
+            }
+        }
+        else {
+            if (interrupting_start) {
+                // We issued a start interrupt, so we expected this failure:
+                if (did_exit) {
+                    log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
+                            WEXITSTATUS(exit_status));
+                }
+                else if (was_signalled) {
+                    log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
+                            WTERMSIG(exit_status));
+                }
+            }
+            else {
+                // ??? failed to stop! Let's log it as warning:
+                if (did_exit) {
+                    log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
+                            WEXITSTATUS(exit_status));
+                }
+                else if (was_signalled) {
+                    log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
+                            WTERMSIG(exit_status));
+                }
+            }
+            // Even if the stop script failed, assume that service is now stopped, so that any dependencies
+            // can be stopped. There's not really any other useful course of action here.
+            interrupting_start = false;
+            stopped();
+        }
+        services->process_queues();
+    }
+    else { // STARTING
+        if (exit_status == 0) {
+            started();
+        }
+        else {
+            // failed to start
+            if (did_exit) {
+                log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
+                        WEXITSTATUS(exit_status));
+            }
+            else if (was_signalled) {
+                log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
+                        WTERMSIG(exit_status));
+            }
+            failed_to_start();
+        }
+        services->process_queues();
+    }
+}
+
+void scripted_service::exec_failed(int errcode) noexcept
+{
+    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+    auto service_state = get_state();
+    if (service_state == service_state_t::STARTING) {
+        failed_to_start();
+    }
+    else if (service_state == service_state_t::STOPPING) {
+        // We've logged the failure, but it's probably better not to leave the service in
+        // STOPPING state:
+        stopped();
+    }
+}
+
+bgproc_service::pid_result_t
+bgproc_service::read_pid_file(int *exit_status) noexcept
+{
+    const char *pid_file_c = pid_file.c_str();
+    int fd = open(pid_file_c, O_CLOEXEC);
+    if (fd == -1) {
+        log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
+        return pid_result_t::FAILED;
+    }
+
+    char pidbuf[21]; // just enough to hold any 64-bit integer
+    int r = ss_read(fd, pidbuf, 20);
+    if (r < 0) {
+        // Could not read from PID file
+        log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
+        close(fd);
+        return pid_result_t::FAILED;
+    }
+
+    close(fd);
+    pidbuf[r] = 0; // store nul terminator
+
+    bool valid_pid = false;
+    try {
+        unsigned long long v = std::stoull(pidbuf, nullptr, 0);
+        if (v <= std::numeric_limits<pid_t>::max()) {
+            pid = (pid_t) v;
+            valid_pid = true;
+        }
+    }
+    catch (std::out_of_range &exc) {
+        // Too large?
+    }
+    catch (std::invalid_argument &exc) {
+        // Ok, so it doesn't look like a number: proceed...
+    }
+
+    if (valid_pid) {
+        pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
+        if (wait_r == -1 && errno == ECHILD) {
+            // We can't track this child - check process exists:
+            if (kill(pid, 0) == 0 || errno != ESRCH) {
+                tracking_child = false;
+                return pid_result_t::OK;
+            }
+            else {
+                log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
+                pid = -1;
+                return pid_result_t::FAILED;
+            }
+        }
+        else if (wait_r == pid) {
+            pid = -1;
+            return pid_result_t::TERMINATED;
+        }
+        else if (wait_r == 0) {
+            // We can track the child
+            child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
+            tracking_child = true;
+            reserved_child_watch = true;
+            return pid_result_t::OK;
+        }
+    }
+
+    log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
+    pid = -1;
+    return pid_result_t::FAILED;
+}
+
+void process_service::bring_down() noexcept
+{
+    waiting_for_deps = false;
+    if (waiting_for_execstat) {
+        // The process is still starting. This should be uncommon, but can occur during
+        // smooth recovery. We can't do much now; we have to wait until we get the
+        // status, and then act appropriately.
+        return;
+    }
+    else if (pid != -1) {
+        // The process is still kicking on - must actually kill it. We signal the process
+        // group (-pid) rather than just the process as there's less risk then of creating
+        // an orphaned process group:
+        if (! onstart_flags.no_sigterm) {
+            kill_pg(SIGTERM);
+        }
+        if (term_signal != -1) {
+            kill_pg(term_signal);
+        }
+
+        // In most cases, the rest is done in handle_exit_status.
+        // If we are a BGPROCESS and the process is not our immediate child, however, that
+        // won't work - check for this now:
+        if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
+            stopped();
+        }
+        else if (stop_timeout != time_val(0,0)) {
+            restart_timer.arm_timer_rel(event_loop, stop_timeout);
+            stop_timer_armed = true;
+        }
+    }
+    else {
+        // The process is already dead.
+        stopped();
+    }
+}
+
+void scripted_service::bring_down() noexcept
+{
+    waiting_for_deps = false;
+    if (stop_command.length() == 0) {
+        stopped();
+    }
+    else if (! start_ps_process(stop_arg_parts, false)) {
+        // Couldn't execute stop script, but there's not much we can do:
+        stopped();
+    }
+    else {
+        // successfully started stop script: start kill timer:
+        if (stop_timeout != time_val(0,0)) {
+            restart_timer.arm_timer_rel(event_loop, stop_timeout);
+            stop_timer_armed = true;
+        }
+    }
+}
+
+dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
+{
+    service->stop_timer_armed = false;
+
+    // Timer expires if:
+    // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
+    // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
+    // including smooth recovery (restart timeout, state is STARTING or STARTED).
+    if (service->get_state() == service_state_t::STOPPING) {
+        service->kill_with_fire();
+    }
+    else if (service->pid != -1) {
+        // Starting, start timed out.
+        service->stop_dependents();
+        service->interrupt_start();
+    }
+    else {
+        // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
+        service->do_restart();
+    }
+
+    // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
+    return dasynq::rearm::NOOP;
+}
diff --git a/src/proc-service.h b/src/proc-service.h
new file mode 100644 (file)
index 0000000..edba90e
--- /dev/null
@@ -0,0 +1,247 @@
+#include "service.h"
+
+// Given a string and a list of pairs of (start,end) indices for each argument in that string,
+// store a null terminator for the argument. Return a `char *` vector containing the beginning
+// of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
+static std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
+{
+    std::vector<const char *> r;
+    r.reserve(arg_indices.size() + 1);
+
+    // First store nul terminator for each part:
+    for (auto index_pair : arg_indices) {
+        if (index_pair.second < s.length()) {
+            s[index_pair.second] = 0;
+        }
+    }
+
+    // Now we can get the C string (c_str) and store offsets into it:
+    const char * cstr = s.c_str();
+    for (auto index_pair : arg_indices) {
+        r.push_back(cstr + index_pair.first);
+    }
+    r.push_back(nullptr);
+    return r;
+}
+
+class base_process_service;
+
+// A timer for process restarting. Used to ensure a minimum delay between process restarts (and
+// also for timing service stop before the SIGKILL hammer is used).
+class process_restart_timer : public eventloop_t::timer_impl<process_restart_timer>
+{
+    public:
+    base_process_service * service;
+
+    process_restart_timer(base_process_service *service_p)
+        : service(service_p)
+    {
+    }
+
+    dasynq::rearm timer_expiry(eventloop_t &, int expiry_count);
+};
+
+class base_process_service : public service_record
+{
+    friend class service_child_watcher;
+    friend class exec_status_pipe_watcher;
+    friend class process_restart_timer;
+
+    private:
+    // Re-launch process
+    void do_restart() noexcept;
+
+    protected:
+    string program_name;          // storage for program/script and arguments
+    std::vector<const char *> exec_arg_parts; // pointer to each argument/part of the program_name, and nullptr
+
+    string stop_command;          // storage for stop program/script and arguments
+    std::vector<const char *> stop_arg_parts; // pointer to each argument/part of the stop_command, and nullptr
+
+    service_child_watcher child_listener;
+    exec_status_pipe_watcher child_status_listener;
+    process_restart_timer restart_timer;
+    time_val last_start_time;
+
+    // Restart interval time and restart count are used to track the number of automatic restarts
+    // over an interval. Too many restarts over an interval will inhibit further restarts.
+    time_val restart_interval_time;  // current restart interval
+    int restart_interval_count;      // count of restarts within current interval
+
+    time_val restart_interval;       // maximum restart interval
+    int max_restart_interval_count;  // number of restarts allowed over maximum interval
+    time_val restart_delay;          // delay between restarts
+
+    // Time allowed for service stop, after which SIGKILL is sent. 0 to disable.
+    time_val stop_timeout = {10, 0}; // default of 10 seconds
+
+    // Time allowed for service start, after which SIGINT is sent (and then SIGKILL after
+    // <stop_timeout>). 0 to disable.
+    time_val start_timeout = {60, 0}; // default of 1 minute
+
+    bool waiting_restart_timer : 1;
+    bool stop_timer_armed : 1;
+    bool reserved_child_watch : 1;
+    bool tracking_child : 1;  // whether we expect to see child process status
+    bool start_is_interruptible : 1;  // whether we can interrupt start
+
+    // Launch the process with the given arguments, return true on success
+    bool start_ps_process(const std::vector<const char *> &args, bool on_console) noexcept;
+
+    // Restart the process (due to start failure or unexpected termination). Restarts will be
+    // rate-limited.
+    bool restart_ps_process() noexcept;
+
+    // Perform smooth recovery process
+    void do_smooth_recovery() noexcept;
+
+    // Start the process, return true on success
+    virtual bool bring_up() noexcept override;
+
+    virtual void bring_down() noexcept override;
+
+    // Called when the process exits. The exit_status is the status value yielded by
+    // the "wait" system call.
+    virtual void handle_exit_status(int exit_status) noexcept = 0;
+
+    // Called if an exec fails.
+    virtual void exec_failed(int errcode) noexcept = 0;
+
+    virtual bool can_interrupt_start() noexcept override
+    {
+        return waiting_restart_timer || start_is_interruptible || service_record::can_interrupt_start();
+    }
+
+    virtual bool can_proceed_to_start() noexcept override
+    {
+        return ! waiting_restart_timer;
+    }
+
+    virtual bool interrupt_start() noexcept override;
+
+    // Kill with SIGKILL
+    void kill_with_fire() noexcept;
+
+    // Signal the process group of the service process
+    void kill_pg(int signo) noexcept;
+
+    public:
+    base_process_service(service_set *sset, string name, service_type_t record_type_p, string &&command,
+            std::list<std::pair<unsigned,unsigned>> &command_offsets,
+            const std::list<prelim_dep> &deplist_p);
+
+    ~base_process_service() noexcept
+    {
+    }
+
+    // Set the stop command and arguments (may throw std::bad_alloc)
+    void set_stop_command(std::string command, std::list<std::pair<unsigned,unsigned>> &stop_command_offsets)
+    {
+        stop_command = command;
+        stop_arg_parts = separate_args(stop_command, stop_command_offsets);
+    }
+
+    void set_restart_interval(timespec interval, int max_restarts) noexcept
+    {
+        restart_interval = interval;
+        max_restart_interval_count = max_restarts;
+    }
+
+    void set_restart_delay(timespec delay) noexcept
+    {
+        restart_delay = delay;
+    }
+
+    void set_stop_timeout(timespec timeout) noexcept
+    {
+        stop_timeout = timeout;
+    }
+
+    void set_start_timeout(timespec timeout) noexcept
+    {
+        start_timeout = timeout;
+    }
+
+    void set_start_interruptible(bool value) noexcept
+    {
+        start_is_interruptible = value;
+    }
+};
+
+class process_service : public base_process_service
+{
+    virtual void handle_exit_status(int exit_status) noexcept override;
+    virtual void exec_failed(int errcode) noexcept override;
+    virtual void bring_down() noexcept override;
+
+    public:
+    process_service(service_set *sset, string name, string &&command,
+            std::list<std::pair<unsigned,unsigned>> &command_offsets,
+            std::list<prelim_dep> depends_p)
+         : base_process_service(sset, name, service_type_t::PROCESS, std::move(command), command_offsets,
+             depends_p)
+    {
+    }
+
+    ~process_service() noexcept
+    {
+    }
+};
+
+class bgproc_service : public base_process_service
+{
+    virtual void handle_exit_status(int exit_status) noexcept override;
+    virtual void exec_failed(int errcode) noexcept override;
+
+    enum class pid_result_t {
+        OK,
+        FAILED,      // failed to read pid or read invalid pid
+        TERMINATED   // read pid successfully, but the process already terminated
+    };
+
+    // Read the pid-file, return false on failure
+    pid_result_t read_pid_file(int *exit_status) noexcept;
+
+    public:
+    bgproc_service(service_set *sset, string name, string &&command,
+            std::list<std::pair<unsigned,unsigned>> &command_offsets,
+            std::list<prelim_dep> depends_p)
+         : base_process_service(sset, name, service_type_t::BGPROCESS, std::move(command), command_offsets,
+             depends_p)
+    {
+    }
+
+    ~bgproc_service() noexcept
+    {
+    }
+};
+
+class scripted_service : public base_process_service
+{
+    virtual void handle_exit_status(int exit_status) noexcept override;
+    virtual void exec_failed(int errcode) noexcept override;
+    virtual void bring_down() noexcept override;
+
+    virtual bool interrupt_start() noexcept override
+    {
+        // if base::interrupt_start() returns false, then start hasn't been fully interrupted, but an
+        // interrupt has been issued:
+        interrupting_start = ! base_process_service::interrupt_start();
+        return ! interrupting_start;
+    }
+
+    bool interrupting_start : 1;  // running start script (true) or stop script (false)
+
+    public:
+    scripted_service(service_set *sset, string name, string &&command,
+            std::list<std::pair<unsigned,unsigned>> &command_offsets,
+            std::list<prelim_dep> depends_p)
+         : base_process_service(sset, name, service_type_t::SCRIPTED, std::move(command), command_offsets,
+             depends_p), interrupting_start(false)
+    {
+    }
+
+    ~scripted_service() noexcept
+    {
+    }
+};
index 3420b7da6f6f528dcdb96ccdb68f1c828ce9faa1..8e067199a92fe3eee8e5700a7b24064609b702a7 100644 (file)
@@ -27,7 +27,6 @@
 // from dinit.cc:
 void open_control_socket(bool report_ro_failure = true) noexcept;
 void setup_external_log() noexcept;
-extern eventloop_t event_loop;
 
 using clock_type = dasynq::clock_type;
 using rearm = dasynq::rearm;
@@ -117,36 +116,6 @@ void service_record::stopped() noexcept
     notify_listeners(service_event_t::STOPPED);
 }
 
-dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
-{
-    base_process_service *sr = service;
-    
-    sr->pid = -1;
-    sr->exit_status = status;
-    
-    // Ok, for a process service, any process death which we didn't rig
-    // ourselves is a bit... unexpected. Probably, the child died because
-    // we asked it to (sr->service_state == STOPPING). But even if
-    // we didn't, there's not much we can do.
-    
-    if (sr->waiting_for_execstat) {
-        // We still don't have an exec() status from the forked child, wait for that
-        // before doing any further processing.
-        return dasynq::rearm::NOOP; // hold watch reservation
-    }
-    
-    // Must stop watch now since handle_exit_status might result in re-launch:
-    // (stop_watch instead of deregister, so that we hold watch reservation).
-    stop_watch(loop);
-    
-    if (sr->stop_timer_armed) {
-        sr->restart_timer.stop_timer(loop);
-        sr->stop_timer_armed = false;
-    }
-
-    sr->handle_exit_status(status);
-    return dasynq::rearm::NOOP;
-}
 
 bool service_record::do_auto_restart() noexcept
 {
@@ -167,314 +136,6 @@ void service_record::emergency_stop() noexcept
     stopped();
 }
 
-void base_process_service::do_smooth_recovery() noexcept
-{
-    if (! restart_ps_process()) {
-        emergency_stop();
-        services->process_queues();
-    }
-}
-
-void process_service::handle_exit_status(int exit_status) noexcept
-{
-    bool did_exit = WIFEXITED(exit_status);
-    bool was_signalled = WIFSIGNALED(exit_status);
-    restarting = false;
-    auto service_state = get_state();
-
-    if (exit_status != 0 && service_state != service_state_t::STOPPING) {
-        if (did_exit) {
-            log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
-                    WEXITSTATUS(exit_status));
-        }
-        else if (was_signalled) {
-            log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
-                    WTERMSIG(exit_status));
-        }
-    }
-
-    if (service_state == service_state_t::STARTING) {
-        if (did_exit && WEXITSTATUS(exit_status) == 0) {
-            started();
-        }
-        else {
-            failed_to_start();
-        }
-    }
-    else if (service_state == service_state_t::STOPPING) {
-        // We won't log a non-zero exit status or termination due to signal here -
-        // we assume that the process died because we signalled it.
-        stopped();
-    }
-    else if (smooth_recovery && service_state == service_state_t::STARTED
-            && get_target_state() == service_state_t::STARTED) {
-        do_smooth_recovery();
-        return;
-    }
-    else {
-        emergency_stop();
-    }
-    services->process_queues();
-}
-
-void process_service::exec_failed(int errcode) noexcept
-{
-    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
-    if (get_state() == service_state_t::STARTING) {
-        failed_to_start();
-    }
-    else {
-        // Process service in smooth recovery:
-        emergency_stop();
-    }
-}
-
-void bgproc_service::handle_exit_status(int exit_status) noexcept
-{
-    begin:
-    bool did_exit = WIFEXITED(exit_status);
-    bool was_signalled = WIFSIGNALED(exit_status);
-    auto service_state = get_state();
-
-    if (exit_status != 0 && service_state != service_state_t::STOPPING) {
-        if (did_exit) {
-            log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
-                    WEXITSTATUS(exit_status));
-        }
-        else if (was_signalled) {
-            log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
-                    WTERMSIG(exit_status));
-        }
-    }
-
-    // This may be a "smooth recovery" where we are restarting the process while leaving the
-    // service in the STARTED state.
-    if (restarting && service_state == service_state_t::STARTED) {
-        restarting = false;
-        bool need_stop = false;
-        if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
-            need_stop = true;
-        }
-        else {
-            // We need to re-read the PID, since it has now changed.
-            if (pid_file.length() != 0) {
-                auto pid_result = read_pid_file(&exit_status);
-                switch (pid_result) {
-                    case pid_result_t::FAILED:
-                        // Failed startup: no auto-restart.
-                        need_stop = true;
-                        break;
-                    case pid_result_t::TERMINATED:
-                        goto begin;
-                    case pid_result_t::OK:
-                        break;
-                }
-            }
-        }
-
-        if (need_stop) {
-            // Failed startup: no auto-restart.
-            emergency_stop();
-            services->process_queues();
-        }
-
-        return;
-    }
-
-    restarting = false;
-    if (service_state == service_state_t::STARTING) {
-        // POSIX requires that if the process exited clearly with a status code of 0,
-        // the exit status value will be 0:
-        if (exit_status == 0) {
-            auto pid_result = read_pid_file(&exit_status);
-            switch (pid_result) {
-                case pid_result_t::FAILED:
-                    // Failed startup: no auto-restart.
-                    failed_to_start();
-                    break;
-                case pid_result_t::TERMINATED:
-                    // started, but immediately terminated
-                    started();
-                    goto begin;
-                case pid_result_t::OK:
-                    started();
-                    break;
-            }
-        }
-        else {
-            failed_to_start();
-        }
-    }
-    else if (service_state == service_state_t::STOPPING) {
-        // We won't log a non-zero exit status or termination due to signal here -
-        // we assume that the process died because we signalled it.
-        stopped();
-    }
-    else {
-        // we must be STARTED
-        if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
-            do_smooth_recovery();
-            return;
-        }
-        if (! do_auto_restart() && start_explicit) {
-            start_explicit = false;
-            release();
-        }
-        forced_stop();
-        stop_dependents();
-        stopped();
-    }
-    services->process_queues();
-}
-
-void bgproc_service::exec_failed(int errcode) noexcept
-{
-    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
-    // Only time we execute is for startup:
-    failed_to_start();
-}
-
-void scripted_service::handle_exit_status(int exit_status) noexcept
-{
-    bool did_exit = WIFEXITED(exit_status);
-    bool was_signalled = WIFSIGNALED(exit_status);
-    auto service_state = get_state();
-
-    // For a scripted service, a termination occurs in one of three main cases:
-    // - the start script completed (or failed), when service was STARTING
-    // - the start script was interrupted to cancel startup; state is STOPPING
-    // - the stop script complete (or failed), state is STOPPING
-
-    if (service_state == service_state_t::STOPPING) {
-        // We might be running the stop script, or we might be running the start script and have issued
-        // a cancel order via SIGINT:
-        if (did_exit && WEXITSTATUS(exit_status) == 0) {
-            if (interrupting_start) {
-                interrupting_start = false;
-                // launch stop script:
-                bring_down();
-            }
-            else {
-                // We were running the stop script and finished successfully
-                stopped();
-            }
-        }
-        else {
-            if (interrupting_start) {
-                // We issued a start interrupt, so we expected this failure:
-                if (did_exit) {
-                    log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
-                            WEXITSTATUS(exit_status));
-                }
-                else if (was_signalled) {
-                    log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
-                            WTERMSIG(exit_status));
-                }
-            }
-            else {
-                // ??? failed to stop! Let's log it as warning:
-                if (did_exit) {
-                    log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
-                            WEXITSTATUS(exit_status));
-                }
-                else if (was_signalled) {
-                    log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
-                            WTERMSIG(exit_status));
-                }
-            }
-            // Even if the stop script failed, assume that service is now stopped, so that any dependencies
-            // can be stopped. There's not really any other useful course of action here.
-            interrupting_start = false;
-            stopped();
-        }
-        services->process_queues();
-    }
-    else { // STARTING
-        if (exit_status == 0) {
-            started();
-        }
-        else {
-            // failed to start
-            if (did_exit) {
-                log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
-                        WEXITSTATUS(exit_status));
-            }
-            else if (was_signalled) {
-                log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
-                        WTERMSIG(exit_status));
-            }
-            failed_to_start();
-        }
-        services->process_queues();
-    }
-}
-
-void scripted_service::exec_failed(int errcode) noexcept
-{
-    log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
-    auto service_state = get_state();
-    if (service_state == service_state_t::STARTING) {
-        failed_to_start();
-    }
-    else if (service_state == service_state_t::STOPPING) {
-        // We've logged the failure, but it's probably better not to leave the service in
-        // STOPPING state:
-        stopped();
-    }
-}
-
-rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
-{
-    base_process_service *sr = service;
-    sr->waiting_for_execstat = false;
-    
-    int exec_status;
-    int r = read(get_watched_fd(), &exec_status, sizeof(int));
-    deregister(loop);
-    close(get_watched_fd());
-    
-    if (r > 0) {
-        // We read an errno code; exec() failed, and the service startup failed.
-        if (sr->pid != -1) {
-            sr->child_listener.deregister(event_loop, sr->pid);
-            sr->reserved_child_watch = false;
-            if (sr->stop_timer_armed) {
-                sr->restart_timer.stop_timer(loop);
-                sr->stop_timer_armed = false;
-            }
-        }
-        sr->pid = -1;
-        sr->exec_failed(exec_status);
-    }
-    else {
-        // exec() succeeded.
-        if (sr->get_type() == service_type_t::PROCESS) {
-            // This could be a smooth recovery (state already STARTED). Even more, the process
-            // might be stopped (and killed via a signal) during smooth recovery.  We don't to
-            // process startup again in either case, so we check for state STARTING:
-            if (sr->get_state() == service_state_t::STARTING) {
-                sr->started();
-            }
-            else if (sr->get_state() == service_state_t::STOPPING) {
-                // stopping, but smooth recovery was in process. That's now over so we can
-                // commence normal stop. Note that if pid == -1 the process already stopped(!),
-                // that's handled below.
-                if (sr->pid != -1 && sr->stop_check_dependents()) {
-                    sr->bring_down();
-                }
-            }
-        }
-        
-        if (sr->pid == -1) {
-            // Somehow the process managed to complete before we even saw the status.
-            sr->handle_exit_status(sr->exit_status);
-        }
-    }
-    
-    sr->services->process_queues();
-    
-    return rearm::REMOVED;
-}
 
 void service_record::require() noexcept
 {
@@ -776,74 +437,6 @@ void service_record::acquired_console() noexcept
     }
 }
 
-bgproc_service::pid_result_t
-bgproc_service::read_pid_file(int *exit_status) noexcept
-{
-    const char *pid_file_c = pid_file.c_str();
-    int fd = open(pid_file_c, O_CLOEXEC);
-    if (fd == -1) {
-        log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
-        return pid_result_t::FAILED;
-    }
-
-    char pidbuf[21]; // just enough to hold any 64-bit integer
-    int r = ss_read(fd, pidbuf, 20);
-    if (r < 0) {
-        // Could not read from PID file
-        log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
-        close(fd);
-        return pid_result_t::FAILED;
-    }
-
-    close(fd);
-    pidbuf[r] = 0; // store nul terminator
-
-    bool valid_pid = false;
-    try {
-        unsigned long long v = std::stoull(pidbuf, nullptr, 0);
-        if (v <= std::numeric_limits<pid_t>::max()) {
-            pid = (pid_t) v;
-            valid_pid = true;
-        }
-    }
-    catch (std::out_of_range &exc) {
-        // Too large?
-    }
-    catch (std::invalid_argument &exc) {
-        // Ok, so it doesn't look like a number: proceed...
-    }
-
-    if (valid_pid) {
-        pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
-        if (wait_r == -1 && errno == ECHILD) {
-            // We can't track this child - check process exists:
-            if (kill(pid, 0) == 0 || errno != ESRCH) {
-                tracking_child = false;
-                return pid_result_t::OK;
-            }
-            else {
-                log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
-                pid = -1;
-                return pid_result_t::FAILED;
-            }
-        }
-        else if (wait_r == pid) {
-            pid = -1;
-            return pid_result_t::TERMINATED;
-        }
-        else if (wait_r == 0) {
-            // We can track the child
-            child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
-            tracking_child = true;
-            reserved_child_watch = true;
-            return pid_result_t::OK;
-        }
-    }
-
-    log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
-    pid = -1;
-    return pid_result_t::FAILED;
-}
 
 void service_record::started() noexcept
 {
@@ -922,131 +515,6 @@ bool service_record::bring_up() noexcept
     return true;
 }
 
-bool base_process_service::bring_up() noexcept
-{
-    if (restarting) {
-        if (pid == -1) {
-            return restart_ps_process();
-        }
-        return true;
-    }
-    else {
-        event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
-        restart_interval_count = 0;
-        if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
-            if (start_timeout != time_val(0,0)) {
-                restart_timer.arm_timer_rel(event_loop, start_timeout);
-                stop_timer_armed = true;
-            }
-            else if (stop_timer_armed) {
-                restart_timer.stop_timer(event_loop);
-                stop_timer_armed = false;
-            }
-            return true;
-        }
-        return false;
-    }
-}
-
-bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
-{
-    // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
-    // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
-    // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
-    // is written to the pipe, and the parent can read it.
-
-    event_loop.get_time(last_start_time, clock_type::MONOTONIC);
-
-    int pipefd[2];
-    if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
-        log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
-        return false;
-    }
-
-    const char * logfile = this->logfile.c_str();
-    if (*logfile == 0) {
-        logfile = "/dev/null";
-    }
-
-    bool child_status_registered = false;
-    control_conn_t *control_conn = nullptr;
-    
-    int control_socket[2] = {-1, -1};
-    if (onstart_flags.pass_cs_fd) {
-        if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
-            log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
-            goto out_p;
-        }
-        
-        // Make the server side socket close-on-exec:
-        int fdflags = fcntl(control_socket[0], F_GETFD);
-        fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
-        
-        try {
-            control_conn = new control_conn_t(event_loop, services, control_socket[0]);
-        }
-        catch (std::exception &exc) {
-            log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
-            goto out_cs;
-        }
-    }
-    
-    // Set up complete, now fork and exec:
-    
-    pid_t forkpid;
-    
-    try {
-        child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
-        child_status_registered = true;
-        
-        // We specify a high priority (i.e. low priority value) so that process termination is
-        // handled early. This means we have always recorded that the process is terminated by the
-        // time that we handle events that might otherwise cause us to signal the process, so we
-        // avoid sending a signal to an invalid (and possibly recycled) process ID.
-        forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
-        reserved_child_watch = true;
-    }
-    catch (std::exception &e) {
-        log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
-        goto out_cs_h;
-    }
-
-    if (forkpid == 0) {
-        run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
-    }
-    else {
-        // Parent process
-        close(pipefd[1]); // close the 'other end' fd
-        if (control_socket[1] != -1) {
-            close(control_socket[1]);
-        }
-        pid = forkpid;
-
-        waiting_for_execstat = true;
-        return true;
-    }
-
-    // Failure exit:
-    
-    out_cs_h:
-    if (child_status_registered) {
-        child_status_listener.deregister(event_loop);
-    }
-    
-    if (onstart_flags.pass_cs_fd) {
-        delete control_conn;
-    
-        out_cs:
-        close(control_socket[0]);
-        close(control_socket[1]);
-    }
-    
-    out_p:
-    close(pipefd[0]);
-    close(pipefd[1]);
-    
-    return false;
-}
 
 void service_record::run_child_proc(const char * const *args, const char *logfile, bool on_console,
         int wpipefd, int csfd) noexcept
@@ -1289,104 +757,6 @@ void service_record::bring_down() noexcept
     stopped();
 }
 
-void base_process_service::kill_pg(int signo) noexcept
-{
-    pid_t pgid = getpgid(pid);
-    if (pgid == -1) {
-        // only should happen if pid is invalid, which should never happen...
-        log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
-        return;
-    }
-    kill(-pgid, signo);
-}
-
-void base_process_service::bring_down() noexcept
-{
-    waiting_for_deps = false;
-    if (pid != -1) {
-        // The process is still kicking on - must actually kill it. We signal the process
-        // group (-pid) rather than just the process as there's less risk then of creating
-        // an orphaned process group:
-        if (! onstart_flags.no_sigterm) {
-            kill_pg(SIGTERM);
-        }
-        if (term_signal != -1) {
-            kill_pg(term_signal);
-        }
-
-        // In most cases, the rest is done in handle_exit_status.
-        // If we are a BGPROCESS and the process is not our immediate child, however, that
-        // won't work - check for this now:
-        if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
-            stopped();
-        }
-        else if (stop_timeout != time_val(0,0)) {
-            restart_timer.arm_timer_rel(event_loop, stop_timeout);
-            stop_timer_armed = true;
-        }
-    }
-    else {
-        // The process is already dead.
-        stopped();
-    }
-}
-
-void process_service::bring_down() noexcept
-{
-    waiting_for_deps = false;
-    if (waiting_for_execstat) {
-        // The process is still starting. This should be uncommon, but can occur during
-        // smooth recovery. We can't do much now; we have to wait until we get the
-        // status, and then act appropriately.
-        return;
-    }
-    else if (pid != -1) {
-        // The process is still kicking on - must actually kill it. We signal the process
-        // group (-pid) rather than just the process as there's less risk then of creating
-        // an orphaned process group:
-        if (! onstart_flags.no_sigterm) {
-            kill_pg(SIGTERM);
-        }
-        if (term_signal != -1) {
-            kill_pg(term_signal);
-        }
-
-        // In most cases, the rest is done in handle_exit_status.
-        // If we are a BGPROCESS and the process is not our immediate child, however, that
-        // won't work - check for this now:
-        if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
-            stopped();
-        }
-        else if (stop_timeout != time_val(0,0)) {
-            restart_timer.arm_timer_rel(event_loop, stop_timeout);
-            stop_timer_armed = true;
-        }
-    }
-    else {
-        // The process is already dead.
-        stopped();
-    }
-}
-
-void scripted_service::bring_down() noexcept
-{
-    waiting_for_deps = false;
-    if (stop_command.length() == 0) {
-        stopped();
-    }
-    else if (! start_ps_process(stop_arg_parts, false)) {
-        // Couldn't execute stop script, but there's not much we can do:
-        stopped();
-    }
-    else {
-        // successfully started stop script: start kill timer:
-        if (stop_timeout != time_val(0,0)) {
-            restart_timer.arm_timer_rel(event_loop, stop_timeout);
-            stop_timer_armed = true;
-        }
-    }
-}
-
 void service_record::unpin() noexcept
 {
     if (pinned_started) {
@@ -1430,156 +800,3 @@ void service_set::service_inactive(service_record *sr) noexcept
 {
     active_services--;
 }
-
-base_process_service::base_process_service(service_set *sset, string name,
-        service_type_t service_type_p, string &&command,
-        std::list<std::pair<unsigned,unsigned>> &command_offsets,
-        const std::list<prelim_dep> &deplist_p)
-     : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
-       child_status_listener(this), restart_timer(this)
-{
-    program_name = std::move(command);
-    exec_arg_parts = separate_args(program_name, command_offsets);
-
-    restart_interval_count = 0;
-    restart_interval_time = {0, 0};
-    restart_timer.service = this;
-    restart_timer.add_timer(event_loop);
-
-    // By default, allow a maximum of 3 restarts within 10.0 seconds:
-    restart_interval.seconds() = 10;
-    restart_interval.nseconds() = 0;
-    max_restart_interval_count = 3;
-
-    waiting_restart_timer = false;
-    reserved_child_watch = false;
-    tracking_child = false;
-    stop_timer_armed = false;
-    start_is_interruptible = false;
-}
-
-void base_process_service::do_restart() noexcept
-{
-    waiting_restart_timer = false;
-    restart_interval_count++;
-    auto service_state = get_state();
-
-    // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
-    // the process should be granted access to the console:
-    bool on_console = service_state == service_state_t::STARTING
-            ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
-
-    if (service_state == service_state_t::STARTING) {
-        // for a smooth recovery, we want to check dependencies are available before actually
-        // starting:
-        if (! check_deps_started()) {
-            waiting_for_deps = true;
-            return;
-        }
-    }
-
-    if (! start_ps_process(exec_arg_parts, on_console)) {
-        restarting = false;
-        if (service_state == service_state_t::STARTING) {
-            failed_to_start();
-        }
-        else {
-            // desired_state = service_state_t::STOPPED;
-            forced_stop();
-        }
-        services->process_queues();
-    }
-}
-
-bool base_process_service::restart_ps_process() noexcept
-{
-    using time_val = dasynq::time_val;
-
-    time_val current_time;
-    event_loop.get_time(current_time, clock_type::MONOTONIC);
-
-    if (max_restart_interval_count != 0) {
-        // Check whether we're still in the most recent restart check interval:
-        time_val int_diff = current_time - restart_interval_time;
-        if (int_diff < restart_interval) {
-            if (restart_interval_count >= max_restart_interval_count) {
-                log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
-                return false;
-            }
-        }
-        else {
-            restart_interval_time = current_time;
-            restart_interval_count = 0;
-        }
-    }
-
-    // Check if enough time has lapsed since the prevous restart. If not, start a timer:
-    time_val tdiff = current_time - last_start_time;
-    if (restart_delay <= tdiff) {
-        // > restart delay (normally 200ms)
-        do_restart();
-    }
-    else {
-        time_val timeout = restart_delay - tdiff;
-        restart_timer.arm_timer_rel(event_loop, timeout);
-        waiting_restart_timer = true;
-    }
-    return true;
-}
-
-bool base_process_service::interrupt_start() noexcept
-{
-    if (waiting_restart_timer) {
-        restart_timer.stop_timer(event_loop);
-        waiting_restart_timer = false;
-        return service_record::interrupt_start();
-    }
-    else {
-        log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
-        kill_pg(SIGINT);
-        if (stop_timeout != time_val(0,0)) {
-            restart_timer.arm_timer_rel(event_loop, stop_timeout);
-            stop_timer_armed = true;
-        }
-        else if (stop_timer_armed) {
-            restart_timer.stop_timer(event_loop);
-            stop_timer_armed = false;
-        }
-        set_state(service_state_t::STOPPING);
-        notify_listeners(service_event_t::STARTCANCELLED);
-        return false;
-    }
-}
-
-void base_process_service::kill_with_fire() noexcept
-{
-    if (pid != -1) {
-        log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
-        kill_pg(SIGKILL);
-    }
-}
-
-dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
-{
-    service->stop_timer_armed = false;
-
-    // Timer expires if:
-    // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
-    // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
-    // including smooth recovery (restart timeout, state is STARTING or STARTED).
-    if (service->get_state() == service_state_t::STOPPING) {
-        service->kill_with_fire();
-    }
-    else if (service->pid != -1) {
-        // Starting, start timed out.
-        service->stop_dependents();
-        service->interrupt_start();
-    }
-    else {
-        // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
-        service->do_restart();
-    }
-
-    // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
-    return dasynq::rearm::NOOP;
-}
index 2e77a792b3a81b584136336b27f98ee7dd3843c9..de2052182cc92f34fefbe5a2004e6ed956ee8ee8 100644 (file)
@@ -213,30 +213,6 @@ class prelim_dep
     }
 };
 
-// Given a string and a list of pairs of (start,end) indices for each argument in that string,
-// store a null terminator for the argument. Return a `char *` vector containing the beginning
-// of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
-static std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
-{
-    std::vector<const char *> r;
-    r.reserve(arg_indices.size() + 1);
-
-    // First store nul terminator for each part:
-    for (auto index_pair : arg_indices) {
-        if (index_pair.second < s.length()) {
-            s[index_pair.second] = 0;
-        }
-    }
-
-    // Now we can get the C string (c_str) and store offsets into it:
-    const char * cstr = s.c_str();
-    for (auto index_pair : arg_indices) {
-        r.push_back(cstr + index_pair.first);
-    }
-    r.push_back(nullptr);
-    return r;
-}
-
 class service_child_watcher : public eventloop_t::child_proc_watcher_impl<service_child_watcher>
 {
     public:
@@ -602,228 +578,6 @@ class service_record
     }
 };
 
-class base_process_service;
-
-// A timer for process restarting. Used to ensure a minimum delay between process restarts (and
-// also for timing service stop before the SIGKILL hammer is used).
-class process_restart_timer : public eventloop_t::timer_impl<process_restart_timer>
-{
-    public:
-    base_process_service * service;
-
-    process_restart_timer(base_process_service *service_p)
-        : service(service_p)
-    {
-    }
-
-    dasynq::rearm timer_expiry(eventloop_t &, int expiry_count);
-};
-
-class base_process_service : public service_record
-{
-    friend class service_child_watcher;
-    friend class exec_status_pipe_watcher;
-    friend class process_restart_timer;
-
-    private:
-    // Re-launch process
-    void do_restart() noexcept;
-
-    protected:
-    string program_name;          // storage for program/script and arguments
-    std::vector<const char *> exec_arg_parts; // pointer to each argument/part of the program_name, and nullptr
-
-    string stop_command;          // storage for stop program/script and arguments
-    std::vector<const char *> stop_arg_parts; // pointer to each argument/part of the stop_command, and nullptr
-
-    service_child_watcher child_listener;
-    exec_status_pipe_watcher child_status_listener;
-    process_restart_timer restart_timer;
-    time_val last_start_time;
-
-    // Restart interval time and restart count are used to track the number of automatic restarts
-    // over an interval. Too many restarts over an interval will inhibit further restarts.
-    time_val restart_interval_time;  // current restart interval
-    int restart_interval_count;      // count of restarts within current interval
-
-    time_val restart_interval;       // maximum restart interval
-    int max_restart_interval_count;  // number of restarts allowed over maximum interval
-    time_val restart_delay;          // delay between restarts
-
-    // Time allowed for service stop, after which SIGKILL is sent. 0 to disable.
-    time_val stop_timeout = {10, 0}; // default of 10 seconds
-
-    // Time allowed for service start, after which SIGINT is sent (and then SIGKILL after
-    // <stop_timeout>). 0 to disable.
-    time_val start_timeout = {60, 0}; // default of 1 minute
-
-    bool waiting_restart_timer : 1;
-    bool stop_timer_armed : 1;
-    bool reserved_child_watch : 1;
-    bool tracking_child : 1;  // whether we expect to see child process status
-    bool start_is_interruptible : 1;  // whether we can interrupt start
-
-    // Launch the process with the given arguments, return true on success
-    bool start_ps_process(const std::vector<const char *> &args, bool on_console) noexcept;
-
-    // Restart the process (due to start failure or unexpected termination). Restarts will be
-    // rate-limited.
-    bool restart_ps_process() noexcept;
-
-    // Perform smooth recovery process
-    void do_smooth_recovery() noexcept;
-
-    // Start the process, return true on success
-    virtual bool bring_up() noexcept override;
-
-    virtual void bring_down() noexcept override;
-
-    // Called when the process exits. The exit_status is the status value yielded by
-    // the "wait" system call.
-    virtual void handle_exit_status(int exit_status) noexcept = 0;
-
-    // Called if an exec fails.
-    virtual void exec_failed(int errcode) noexcept = 0;
-
-    virtual bool can_interrupt_start() noexcept override
-    {
-        return waiting_restart_timer || start_is_interruptible || service_record::can_interrupt_start();
-    }
-
-    virtual bool can_proceed_to_start() noexcept override
-    {
-        return ! waiting_restart_timer;
-    }
-
-    virtual bool interrupt_start() noexcept override;
-
-    // Kill with SIGKILL
-    void kill_with_fire() noexcept;
-
-    // Signal the process group of the service process
-    void kill_pg(int signo) noexcept;
-
-    public:
-    base_process_service(service_set *sset, string name, service_type_t record_type_p, string &&command,
-            std::list<std::pair<unsigned,unsigned>> &command_offsets,
-            const std::list<prelim_dep> &deplist_p);
-
-    ~base_process_service() noexcept
-    {
-    }
-
-    // Set the stop command and arguments (may throw std::bad_alloc)
-    void set_stop_command(std::string command, std::list<std::pair<unsigned,unsigned>> &stop_command_offsets)
-    {
-        stop_command = command;
-        stop_arg_parts = separate_args(stop_command, stop_command_offsets);
-    }
-
-    void set_restart_interval(timespec interval, int max_restarts) noexcept
-    {
-        restart_interval = interval;
-        max_restart_interval_count = max_restarts;
-    }
-
-    void set_restart_delay(timespec delay) noexcept
-    {
-        restart_delay = delay;
-    }
-
-    void set_stop_timeout(timespec timeout) noexcept
-    {
-        stop_timeout = timeout;
-    }
-
-    void set_start_timeout(timespec timeout) noexcept
-    {
-        start_timeout = timeout;
-    }
-
-    void set_start_interruptible(bool value) noexcept
-    {
-        start_is_interruptible = value;
-    }
-};
-
-class process_service : public base_process_service
-{
-    virtual void handle_exit_status(int exit_status) noexcept override;
-    virtual void exec_failed(int errcode) noexcept override;
-    virtual void bring_down() noexcept override;
-
-    public:
-    process_service(service_set *sset, string name, string &&command,
-            std::list<std::pair<unsigned,unsigned>> &command_offsets,
-            std::list<prelim_dep> depends_p)
-         : base_process_service(sset, name, service_type_t::PROCESS, std::move(command), command_offsets,
-             depends_p)
-    {
-    }
-
-    ~process_service() noexcept
-    {
-    }
-};
-
-class bgproc_service : public base_process_service
-{
-    virtual void handle_exit_status(int exit_status) noexcept override;
-    virtual void exec_failed(int errcode) noexcept override;
-
-    enum class pid_result_t {
-        OK,
-        FAILED,      // failed to read pid or read invalid pid
-        TERMINATED   // read pid successfully, but the process already terminated
-    };
-
-    // Read the pid-file, return false on failure
-    pid_result_t read_pid_file(int *exit_status) noexcept;
-
-    public:
-    bgproc_service(service_set *sset, string name, string &&command,
-            std::list<std::pair<unsigned,unsigned>> &command_offsets,
-            std::list<prelim_dep> depends_p)
-         : base_process_service(sset, name, service_type_t::BGPROCESS, std::move(command), command_offsets,
-             depends_p)
-    {
-    }
-
-    ~bgproc_service() noexcept
-    {
-    }
-};
-
-class scripted_service : public base_process_service
-{
-    virtual void handle_exit_status(int exit_status) noexcept override;
-    virtual void exec_failed(int errcode) noexcept override;
-    virtual void bring_down() noexcept override;
-
-    virtual bool interrupt_start() noexcept override
-    {
-        // if base::interrupt_start() returns false, then start hasn't been fully interrupted, but an
-        // interrupt has been issued:
-        interrupting_start = ! base_process_service::interrupt_start();
-        return ! interrupting_start;
-    }
-
-    bool interrupting_start : 1;  // running start script (true) or stop script (false)
-
-    public:
-    scripted_service(service_set *sset, string name, string &&command,
-            std::list<std::pair<unsigned,unsigned>> &command_offsets,
-            std::list<prelim_dep> depends_p)
-         : base_process_service(sset, name, service_type_t::SCRIPTED, std::move(command), command_offsets,
-             depends_p), interrupting_start(false)
-    {
-    }
-
-    ~scripted_service() noexcept
-    {
-    }
-};
-
 inline auto extract_prop_queue(service_record *sr) -> decltype(sr->prop_queue_node) &
 {
     return sr->prop_queue_node;