service.cc becomes service.cc + proc-service.cc + baseproc-service.cc.
The header service.h becomes service.h + proc-service.h.
This refactoring should make testing easier.
SHUTDOWN=shutdown
endif
-objects = dinit.o load_service.o service.o control.o dinit-log.o dinit-main.o dinitctl.o shutdown.o
+#objects = dinit.o load_service.o service.o proc-service.o baseproc-service.o control.o dinit-log.o dinit-main.o dinitctl.o shutdown.o
-dinit_objects = dinit.o load_service.o service.o control.o dinit-log.o dinit-main.o
+dinit_objects = dinit.o load_service.o service.o proc-service.o baseproc-service.o control.o dinit-log.o dinit-main.o
+
+objects = $(dinit_objects) dinitctl.o shtudown.o
all: dinit dinitctl $(SHUTDOWN)
--- /dev/null
+#include "dinit-socket.h"
+#include "proc-service.h"
+
+/*
+ * Base process implementation (base_process_service).
+ *
+ * See proc-service.h for interface documentation.
+ */
+
+extern eventloop_t event_loop;
+
+using clock_type = dasynq::clock_type;
+using rearm = dasynq::rearm;
+using time_val = dasynq::time_val;
+
+void base_process_service::do_smooth_recovery() noexcept
+{
+ if (! restart_ps_process()) {
+ emergency_stop();
+ services->process_queues();
+ }
+}
+
+bool base_process_service::bring_up() noexcept
+{
+ if (restarting) {
+ if (pid == -1) {
+ return restart_ps_process();
+ }
+ return true;
+ }
+ else {
+ event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
+ restart_interval_count = 0;
+ if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
+ if (start_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, start_timeout);
+ stop_timer_armed = true;
+ }
+ else if (stop_timer_armed) {
+ restart_timer.stop_timer(event_loop);
+ stop_timer_armed = false;
+ }
+ return true;
+ }
+ return false;
+ }
+}
+
+bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
+{
+ // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
+ // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
+ // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
+ // is written to the pipe, and the parent can read it.
+
+ event_loop.get_time(last_start_time, clock_type::MONOTONIC);
+
+ int pipefd[2];
+ if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
+ log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
+ return false;
+ }
+
+ const char * logfile = this->logfile.c_str();
+ if (*logfile == 0) {
+ logfile = "/dev/null";
+ }
+
+ bool child_status_registered = false;
+ control_conn_t *control_conn = nullptr;
+
+ int control_socket[2] = {-1, -1};
+ if (onstart_flags.pass_cs_fd) {
+ if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
+ log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
+ goto out_p;
+ }
+
+ // Make the server side socket close-on-exec:
+ int fdflags = fcntl(control_socket[0], F_GETFD);
+ fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
+
+ try {
+ control_conn = new control_conn_t(event_loop, services, control_socket[0]);
+ }
+ catch (std::exception &exc) {
+ log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
+ goto out_cs;
+ }
+ }
+
+ // Set up complete, now fork and exec:
+
+ pid_t forkpid;
+
+ try {
+ child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
+ child_status_registered = true;
+
+ // We specify a high priority (i.e. low priority value) so that process termination is
+ // handled early. This means we have always recorded that the process is terminated by the
+ // time that we handle events that might otherwise cause us to signal the process, so we
+ // avoid sending a signal to an invalid (and possibly recycled) process ID.
+ forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
+ reserved_child_watch = true;
+ }
+ catch (std::exception &e) {
+ log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
+ goto out_cs_h;
+ }
+
+ if (forkpid == 0) {
+ run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
+ }
+ else {
+ // Parent process
+ close(pipefd[1]); // close the 'other end' fd
+ if (control_socket[1] != -1) {
+ close(control_socket[1]);
+ }
+ pid = forkpid;
+
+ waiting_for_execstat = true;
+ return true;
+ }
+
+ // Failure exit:
+
+ out_cs_h:
+ if (child_status_registered) {
+ child_status_listener.deregister(event_loop);
+ }
+
+ if (onstart_flags.pass_cs_fd) {
+ delete control_conn;
+
+ out_cs:
+ close(control_socket[0]);
+ close(control_socket[1]);
+ }
+
+ out_p:
+ close(pipefd[0]);
+ close(pipefd[1]);
+
+ return false;
+}
+
+void base_process_service::bring_down() noexcept
+{
+ waiting_for_deps = false;
+ if (pid != -1) {
+ // The process is still kicking on - must actually kill it. We signal the process
+ // group (-pid) rather than just the process as there's less risk then of creating
+ // an orphaned process group:
+ if (! onstart_flags.no_sigterm) {
+ kill_pg(SIGTERM);
+ }
+ if (term_signal != -1) {
+ kill_pg(term_signal);
+ }
+
+ // In most cases, the rest is done in handle_exit_status.
+ // If we are a BGPROCESS and the process is not our immediate child, however, that
+ // won't work - check for this now:
+ if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
+ stopped();
+ }
+ else if (stop_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, stop_timeout);
+ stop_timer_armed = true;
+ }
+ }
+ else {
+ // The process is already dead.
+ stopped();
+ }
+}
+
+base_process_service::base_process_service(service_set *sset, string name,
+ service_type_t service_type_p, string &&command,
+ std::list<std::pair<unsigned,unsigned>> &command_offsets,
+ const std::list<prelim_dep> &deplist_p)
+ : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
+ child_status_listener(this), restart_timer(this)
+{
+ program_name = std::move(command);
+ exec_arg_parts = separate_args(program_name, command_offsets);
+
+ restart_interval_count = 0;
+ restart_interval_time = {0, 0};
+ restart_timer.service = this;
+ restart_timer.add_timer(event_loop);
+
+ // By default, allow a maximum of 3 restarts within 10.0 seconds:
+ restart_interval.seconds() = 10;
+ restart_interval.nseconds() = 0;
+ max_restart_interval_count = 3;
+
+ waiting_restart_timer = false;
+ reserved_child_watch = false;
+ tracking_child = false;
+ stop_timer_armed = false;
+ start_is_interruptible = false;
+}
+
+void base_process_service::do_restart() noexcept
+{
+ waiting_restart_timer = false;
+ restart_interval_count++;
+ auto service_state = get_state();
+
+ // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
+ // the process should be granted access to the console:
+ bool on_console = service_state == service_state_t::STARTING
+ ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
+
+ if (service_state == service_state_t::STARTING) {
+ // for a smooth recovery, we want to check dependencies are available before actually
+ // starting:
+ if (! check_deps_started()) {
+ waiting_for_deps = true;
+ return;
+ }
+ }
+
+ if (! start_ps_process(exec_arg_parts, on_console)) {
+ restarting = false;
+ if (service_state == service_state_t::STARTING) {
+ failed_to_start();
+ }
+ else {
+ // desired_state = service_state_t::STOPPED;
+ forced_stop();
+ }
+ services->process_queues();
+ }
+}
+
+bool base_process_service::restart_ps_process() noexcept
+{
+ using time_val = dasynq::time_val;
+
+ time_val current_time;
+ event_loop.get_time(current_time, clock_type::MONOTONIC);
+
+ if (max_restart_interval_count != 0) {
+ // Check whether we're still in the most recent restart check interval:
+ time_val int_diff = current_time - restart_interval_time;
+ if (int_diff < restart_interval) {
+ if (restart_interval_count >= max_restart_interval_count) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
+ return false;
+ }
+ }
+ else {
+ restart_interval_time = current_time;
+ restart_interval_count = 0;
+ }
+ }
+
+ // Check if enough time has lapsed since the prevous restart. If not, start a timer:
+ time_val tdiff = current_time - last_start_time;
+ if (restart_delay <= tdiff) {
+ // > restart delay (normally 200ms)
+ do_restart();
+ }
+ else {
+ time_val timeout = restart_delay - tdiff;
+ restart_timer.arm_timer_rel(event_loop, timeout);
+ waiting_restart_timer = true;
+ }
+ return true;
+}
+
+bool base_process_service::interrupt_start() noexcept
+{
+ if (waiting_restart_timer) {
+ restart_timer.stop_timer(event_loop);
+ waiting_restart_timer = false;
+ return service_record::interrupt_start();
+ }
+ else {
+ log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
+ kill_pg(SIGINT);
+ if (stop_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, stop_timeout);
+ stop_timer_armed = true;
+ }
+ else if (stop_timer_armed) {
+ restart_timer.stop_timer(event_loop);
+ stop_timer_armed = false;
+ }
+ set_state(service_state_t::STOPPING);
+ notify_listeners(service_event_t::STARTCANCELLED);
+ return false;
+ }
+}
+
+void base_process_service::kill_with_fire() noexcept
+{
+ if (pid != -1) {
+ log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
+ kill_pg(SIGKILL);
+ }
+}
+
+void base_process_service::kill_pg(int signo) noexcept
+{
+ pid_t pgid = getpgid(pid);
+ if (pgid == -1) {
+ // only should happen if pid is invalid, which should never happen...
+ log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
+ return;
+ }
+ kill(-pgid, signo);
+}
#ifndef DINIT_UTIL_H_INCLUDED
#define DINIT_UTIL_H_INCLUDED 1
+#include <cstddef>
#include <cerrno>
+#include <sys/types.h>
+#include <unistd.h>
+
// Signal-safe read. Read and re-try if interrupted by signal (EINTR).
// *May* affect errno even on a successful read (when the return is less than n).
inline ssize_t ss_read(int fd, void * buf, size_t n)
#include <pwd.h>
#include <grp.h>
-#include "service.h"
+#include "proc-service.h"
using string = std::string;
using string_iterator = std::string::iterator;
--- /dev/null
+#include <sys/un.h>
+#include <sys/socket.h>
+
+#include "dinit-socket.h"
+#include "dinit-util.h"
+#include "proc-service.h"
+
+extern eventloop_t event_loop;
+
+using clock_type = dasynq::clock_type;
+using rearm = dasynq::rearm;
+using time_val = dasynq::time_val;
+
+rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
+{
+ base_process_service *sr = service;
+ sr->waiting_for_execstat = false;
+
+ int exec_status;
+ int r = read(get_watched_fd(), &exec_status, sizeof(int));
+ deregister(loop);
+ close(get_watched_fd());
+
+ if (r > 0) {
+ // We read an errno code; exec() failed, and the service startup failed.
+ if (sr->pid != -1) {
+ sr->child_listener.deregister(event_loop, sr->pid);
+ sr->reserved_child_watch = false;
+ if (sr->stop_timer_armed) {
+ sr->restart_timer.stop_timer(loop);
+ sr->stop_timer_armed = false;
+ }
+ }
+ sr->pid = -1;
+ sr->exec_failed(exec_status);
+ }
+ else {
+ // exec() succeeded.
+ if (sr->get_type() == service_type_t::PROCESS) {
+ // This could be a smooth recovery (state already STARTED). Even more, the process
+ // might be stopped (and killed via a signal) during smooth recovery. We don't to
+ // process startup again in either case, so we check for state STARTING:
+ if (sr->get_state() == service_state_t::STARTING) {
+ sr->started();
+ }
+ else if (sr->get_state() == service_state_t::STOPPING) {
+ // stopping, but smooth recovery was in process. That's now over so we can
+ // commence normal stop. Note that if pid == -1 the process already stopped(!),
+ // that's handled below.
+ if (sr->pid != -1 && sr->stop_check_dependents()) {
+ sr->bring_down();
+ }
+ }
+ }
+
+ if (sr->pid == -1) {
+ // Somehow the process managed to complete before we even saw the status.
+ sr->handle_exit_status(sr->exit_status);
+ }
+ }
+
+ sr->services->process_queues();
+
+ return rearm::REMOVED;
+}
+
+dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
+{
+ base_process_service *sr = service;
+
+ sr->pid = -1;
+ sr->exit_status = status;
+
+ // Ok, for a process service, any process death which we didn't rig
+ // ourselves is a bit... unexpected. Probably, the child died because
+ // we asked it to (sr->service_state == STOPPING). But even if
+ // we didn't, there's not much we can do.
+
+ if (sr->waiting_for_execstat) {
+ // We still don't have an exec() status from the forked child, wait for that
+ // before doing any further processing.
+ return dasynq::rearm::NOOP; // hold watch reservation
+ }
+
+ // Must stop watch now since handle_exit_status might result in re-launch:
+ // (stop_watch instead of deregister, so that we hold watch reservation).
+ stop_watch(loop);
+
+ if (sr->stop_timer_armed) {
+ sr->restart_timer.stop_timer(loop);
+ sr->stop_timer_armed = false;
+ }
+
+ sr->handle_exit_status(status);
+ return dasynq::rearm::NOOP;
+}
+
+void process_service::handle_exit_status(int exit_status) noexcept
+{
+ bool did_exit = WIFEXITED(exit_status);
+ bool was_signalled = WIFSIGNALED(exit_status);
+ restarting = false;
+ auto service_state = get_state();
+
+ if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+ if (did_exit) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
+ WEXITSTATUS(exit_status));
+ }
+ else if (was_signalled) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
+ WTERMSIG(exit_status));
+ }
+ }
+
+ if (service_state == service_state_t::STARTING) {
+ if (did_exit && WEXITSTATUS(exit_status) == 0) {
+ started();
+ }
+ else {
+ failed_to_start();
+ }
+ }
+ else if (service_state == service_state_t::STOPPING) {
+ // We won't log a non-zero exit status or termination due to signal here -
+ // we assume that the process died because we signalled it.
+ stopped();
+ }
+ else if (smooth_recovery && service_state == service_state_t::STARTED
+ && get_target_state() == service_state_t::STARTED) {
+ do_smooth_recovery();
+ return;
+ }
+ else {
+ emergency_stop();
+ }
+ services->process_queues();
+}
+
+void process_service::exec_failed(int errcode) noexcept
+{
+ log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ if (get_state() == service_state_t::STARTING) {
+ failed_to_start();
+ }
+ else {
+ // Process service in smooth recovery:
+ emergency_stop();
+ }
+}
+
+void bgproc_service::handle_exit_status(int exit_status) noexcept
+{
+ begin:
+ bool did_exit = WIFEXITED(exit_status);
+ bool was_signalled = WIFSIGNALED(exit_status);
+ auto service_state = get_state();
+
+ if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+ if (did_exit) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
+ WEXITSTATUS(exit_status));
+ }
+ else if (was_signalled) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
+ WTERMSIG(exit_status));
+ }
+ }
+
+ // This may be a "smooth recovery" where we are restarting the process while leaving the
+ // service in the STARTED state.
+ if (restarting && service_state == service_state_t::STARTED) {
+ restarting = false;
+ bool need_stop = false;
+ if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
+ need_stop = true;
+ }
+ else {
+ // We need to re-read the PID, since it has now changed.
+ if (pid_file.length() != 0) {
+ auto pid_result = read_pid_file(&exit_status);
+ switch (pid_result) {
+ case pid_result_t::FAILED:
+ // Failed startup: no auto-restart.
+ need_stop = true;
+ break;
+ case pid_result_t::TERMINATED:
+ goto begin;
+ case pid_result_t::OK:
+ break;
+ }
+ }
+ }
+
+ if (need_stop) {
+ // Failed startup: no auto-restart.
+ emergency_stop();
+ services->process_queues();
+ }
+
+ return;
+ }
+
+ restarting = false;
+ if (service_state == service_state_t::STARTING) {
+ // POSIX requires that if the process exited clearly with a status code of 0,
+ // the exit status value will be 0:
+ if (exit_status == 0) {
+ auto pid_result = read_pid_file(&exit_status);
+ switch (pid_result) {
+ case pid_result_t::FAILED:
+ // Failed startup: no auto-restart.
+ failed_to_start();
+ break;
+ case pid_result_t::TERMINATED:
+ // started, but immediately terminated
+ started();
+ goto begin;
+ case pid_result_t::OK:
+ started();
+ break;
+ }
+ }
+ else {
+ failed_to_start();
+ }
+ }
+ else if (service_state == service_state_t::STOPPING) {
+ // We won't log a non-zero exit status or termination due to signal here -
+ // we assume that the process died because we signalled it.
+ stopped();
+ }
+ else {
+ // we must be STARTED
+ if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
+ do_smooth_recovery();
+ return;
+ }
+ if (! do_auto_restart() && start_explicit) {
+ start_explicit = false;
+ release();
+ }
+ forced_stop();
+ stop_dependents();
+ stopped();
+ }
+ services->process_queues();
+}
+
+void bgproc_service::exec_failed(int errcode) noexcept
+{
+ log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ // Only time we execute is for startup:
+ failed_to_start();
+}
+
+void scripted_service::handle_exit_status(int exit_status) noexcept
+{
+ bool did_exit = WIFEXITED(exit_status);
+ bool was_signalled = WIFSIGNALED(exit_status);
+ auto service_state = get_state();
+
+ // For a scripted service, a termination occurs in one of three main cases:
+ // - the start script completed (or failed), when service was STARTING
+ // - the start script was interrupted to cancel startup; state is STOPPING
+ // - the stop script complete (or failed), state is STOPPING
+
+ if (service_state == service_state_t::STOPPING) {
+ // We might be running the stop script, or we might be running the start script and have issued
+ // a cancel order via SIGINT:
+ if (did_exit && WEXITSTATUS(exit_status) == 0) {
+ if (interrupting_start) {
+ interrupting_start = false;
+ // launch stop script:
+ bring_down();
+ }
+ else {
+ // We were running the stop script and finished successfully
+ stopped();
+ }
+ }
+ else {
+ if (interrupting_start) {
+ // We issued a start interrupt, so we expected this failure:
+ if (did_exit) {
+ log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
+ WEXITSTATUS(exit_status));
+ }
+ else if (was_signalled) {
+ log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
+ WTERMSIG(exit_status));
+ }
+ }
+ else {
+ // ??? failed to stop! Let's log it as warning:
+ if (did_exit) {
+ log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
+ WEXITSTATUS(exit_status));
+ }
+ else if (was_signalled) {
+ log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
+ WTERMSIG(exit_status));
+ }
+ }
+ // Even if the stop script failed, assume that service is now stopped, so that any dependencies
+ // can be stopped. There's not really any other useful course of action here.
+ interrupting_start = false;
+ stopped();
+ }
+ services->process_queues();
+ }
+ else { // STARTING
+ if (exit_status == 0) {
+ started();
+ }
+ else {
+ // failed to start
+ if (did_exit) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
+ WEXITSTATUS(exit_status));
+ }
+ else if (was_signalled) {
+ log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
+ WTERMSIG(exit_status));
+ }
+ failed_to_start();
+ }
+ services->process_queues();
+ }
+}
+
+void scripted_service::exec_failed(int errcode) noexcept
+{
+ log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ auto service_state = get_state();
+ if (service_state == service_state_t::STARTING) {
+ failed_to_start();
+ }
+ else if (service_state == service_state_t::STOPPING) {
+ // We've logged the failure, but it's probably better not to leave the service in
+ // STOPPING state:
+ stopped();
+ }
+}
+
+bgproc_service::pid_result_t
+bgproc_service::read_pid_file(int *exit_status) noexcept
+{
+ const char *pid_file_c = pid_file.c_str();
+ int fd = open(pid_file_c, O_CLOEXEC);
+ if (fd == -1) {
+ log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
+ return pid_result_t::FAILED;
+ }
+
+ char pidbuf[21]; // just enough to hold any 64-bit integer
+ int r = ss_read(fd, pidbuf, 20);
+ if (r < 0) {
+ // Could not read from PID file
+ log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
+ close(fd);
+ return pid_result_t::FAILED;
+ }
+
+ close(fd);
+ pidbuf[r] = 0; // store nul terminator
+
+ bool valid_pid = false;
+ try {
+ unsigned long long v = std::stoull(pidbuf, nullptr, 0);
+ if (v <= std::numeric_limits<pid_t>::max()) {
+ pid = (pid_t) v;
+ valid_pid = true;
+ }
+ }
+ catch (std::out_of_range &exc) {
+ // Too large?
+ }
+ catch (std::invalid_argument &exc) {
+ // Ok, so it doesn't look like a number: proceed...
+ }
+
+ if (valid_pid) {
+ pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
+ if (wait_r == -1 && errno == ECHILD) {
+ // We can't track this child - check process exists:
+ if (kill(pid, 0) == 0 || errno != ESRCH) {
+ tracking_child = false;
+ return pid_result_t::OK;
+ }
+ else {
+ log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
+ pid = -1;
+ return pid_result_t::FAILED;
+ }
+ }
+ else if (wait_r == pid) {
+ pid = -1;
+ return pid_result_t::TERMINATED;
+ }
+ else if (wait_r == 0) {
+ // We can track the child
+ child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
+ tracking_child = true;
+ reserved_child_watch = true;
+ return pid_result_t::OK;
+ }
+ }
+
+ log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
+ pid = -1;
+ return pid_result_t::FAILED;
+}
+
+void process_service::bring_down() noexcept
+{
+ waiting_for_deps = false;
+ if (waiting_for_execstat) {
+ // The process is still starting. This should be uncommon, but can occur during
+ // smooth recovery. We can't do much now; we have to wait until we get the
+ // status, and then act appropriately.
+ return;
+ }
+ else if (pid != -1) {
+ // The process is still kicking on - must actually kill it. We signal the process
+ // group (-pid) rather than just the process as there's less risk then of creating
+ // an orphaned process group:
+ if (! onstart_flags.no_sigterm) {
+ kill_pg(SIGTERM);
+ }
+ if (term_signal != -1) {
+ kill_pg(term_signal);
+ }
+
+ // In most cases, the rest is done in handle_exit_status.
+ // If we are a BGPROCESS and the process is not our immediate child, however, that
+ // won't work - check for this now:
+ if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
+ stopped();
+ }
+ else if (stop_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, stop_timeout);
+ stop_timer_armed = true;
+ }
+ }
+ else {
+ // The process is already dead.
+ stopped();
+ }
+}
+
+void scripted_service::bring_down() noexcept
+{
+ waiting_for_deps = false;
+ if (stop_command.length() == 0) {
+ stopped();
+ }
+ else if (! start_ps_process(stop_arg_parts, false)) {
+ // Couldn't execute stop script, but there's not much we can do:
+ stopped();
+ }
+ else {
+ // successfully started stop script: start kill timer:
+ if (stop_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, stop_timeout);
+ stop_timer_armed = true;
+ }
+ }
+}
+
+dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
+{
+ service->stop_timer_armed = false;
+
+ // Timer expires if:
+ // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
+ // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
+ // including smooth recovery (restart timeout, state is STARTING or STARTED).
+ if (service->get_state() == service_state_t::STOPPING) {
+ service->kill_with_fire();
+ }
+ else if (service->pid != -1) {
+ // Starting, start timed out.
+ service->stop_dependents();
+ service->interrupt_start();
+ }
+ else {
+ // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
+ service->do_restart();
+ }
+
+ // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
+ return dasynq::rearm::NOOP;
+}
--- /dev/null
+#include "service.h"
+
+// Given a string and a list of pairs of (start,end) indices for each argument in that string,
+// store a null terminator for the argument. Return a `char *` vector containing the beginning
+// of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
+static std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
+{
+ std::vector<const char *> r;
+ r.reserve(arg_indices.size() + 1);
+
+ // First store nul terminator for each part:
+ for (auto index_pair : arg_indices) {
+ if (index_pair.second < s.length()) {
+ s[index_pair.second] = 0;
+ }
+ }
+
+ // Now we can get the C string (c_str) and store offsets into it:
+ const char * cstr = s.c_str();
+ for (auto index_pair : arg_indices) {
+ r.push_back(cstr + index_pair.first);
+ }
+ r.push_back(nullptr);
+ return r;
+}
+
+class base_process_service;
+
+// A timer for process restarting. Used to ensure a minimum delay between process restarts (and
+// also for timing service stop before the SIGKILL hammer is used).
+class process_restart_timer : public eventloop_t::timer_impl<process_restart_timer>
+{
+ public:
+ base_process_service * service;
+
+ process_restart_timer(base_process_service *service_p)
+ : service(service_p)
+ {
+ }
+
+ dasynq::rearm timer_expiry(eventloop_t &, int expiry_count);
+};
+
+class base_process_service : public service_record
+{
+ friend class service_child_watcher;
+ friend class exec_status_pipe_watcher;
+ friend class process_restart_timer;
+
+ private:
+ // Re-launch process
+ void do_restart() noexcept;
+
+ protected:
+ string program_name; // storage for program/script and arguments
+ std::vector<const char *> exec_arg_parts; // pointer to each argument/part of the program_name, and nullptr
+
+ string stop_command; // storage for stop program/script and arguments
+ std::vector<const char *> stop_arg_parts; // pointer to each argument/part of the stop_command, and nullptr
+
+ service_child_watcher child_listener;
+ exec_status_pipe_watcher child_status_listener;
+ process_restart_timer restart_timer;
+ time_val last_start_time;
+
+ // Restart interval time and restart count are used to track the number of automatic restarts
+ // over an interval. Too many restarts over an interval will inhibit further restarts.
+ time_val restart_interval_time; // current restart interval
+ int restart_interval_count; // count of restarts within current interval
+
+ time_val restart_interval; // maximum restart interval
+ int max_restart_interval_count; // number of restarts allowed over maximum interval
+ time_val restart_delay; // delay between restarts
+
+ // Time allowed for service stop, after which SIGKILL is sent. 0 to disable.
+ time_val stop_timeout = {10, 0}; // default of 10 seconds
+
+ // Time allowed for service start, after which SIGINT is sent (and then SIGKILL after
+ // <stop_timeout>). 0 to disable.
+ time_val start_timeout = {60, 0}; // default of 1 minute
+
+ bool waiting_restart_timer : 1;
+ bool stop_timer_armed : 1;
+ bool reserved_child_watch : 1;
+ bool tracking_child : 1; // whether we expect to see child process status
+ bool start_is_interruptible : 1; // whether we can interrupt start
+
+ // Launch the process with the given arguments, return true on success
+ bool start_ps_process(const std::vector<const char *> &args, bool on_console) noexcept;
+
+ // Restart the process (due to start failure or unexpected termination). Restarts will be
+ // rate-limited.
+ bool restart_ps_process() noexcept;
+
+ // Perform smooth recovery process
+ void do_smooth_recovery() noexcept;
+
+ // Start the process, return true on success
+ virtual bool bring_up() noexcept override;
+
+ virtual void bring_down() noexcept override;
+
+ // Called when the process exits. The exit_status is the status value yielded by
+ // the "wait" system call.
+ virtual void handle_exit_status(int exit_status) noexcept = 0;
+
+ // Called if an exec fails.
+ virtual void exec_failed(int errcode) noexcept = 0;
+
+ virtual bool can_interrupt_start() noexcept override
+ {
+ return waiting_restart_timer || start_is_interruptible || service_record::can_interrupt_start();
+ }
+
+ virtual bool can_proceed_to_start() noexcept override
+ {
+ return ! waiting_restart_timer;
+ }
+
+ virtual bool interrupt_start() noexcept override;
+
+ // Kill with SIGKILL
+ void kill_with_fire() noexcept;
+
+ // Signal the process group of the service process
+ void kill_pg(int signo) noexcept;
+
+ public:
+ base_process_service(service_set *sset, string name, service_type_t record_type_p, string &&command,
+ std::list<std::pair<unsigned,unsigned>> &command_offsets,
+ const std::list<prelim_dep> &deplist_p);
+
+ ~base_process_service() noexcept
+ {
+ }
+
+ // Set the stop command and arguments (may throw std::bad_alloc)
+ void set_stop_command(std::string command, std::list<std::pair<unsigned,unsigned>> &stop_command_offsets)
+ {
+ stop_command = command;
+ stop_arg_parts = separate_args(stop_command, stop_command_offsets);
+ }
+
+ void set_restart_interval(timespec interval, int max_restarts) noexcept
+ {
+ restart_interval = interval;
+ max_restart_interval_count = max_restarts;
+ }
+
+ void set_restart_delay(timespec delay) noexcept
+ {
+ restart_delay = delay;
+ }
+
+ void set_stop_timeout(timespec timeout) noexcept
+ {
+ stop_timeout = timeout;
+ }
+
+ void set_start_timeout(timespec timeout) noexcept
+ {
+ start_timeout = timeout;
+ }
+
+ void set_start_interruptible(bool value) noexcept
+ {
+ start_is_interruptible = value;
+ }
+};
+
+class process_service : public base_process_service
+{
+ virtual void handle_exit_status(int exit_status) noexcept override;
+ virtual void exec_failed(int errcode) noexcept override;
+ virtual void bring_down() noexcept override;
+
+ public:
+ process_service(service_set *sset, string name, string &&command,
+ std::list<std::pair<unsigned,unsigned>> &command_offsets,
+ std::list<prelim_dep> depends_p)
+ : base_process_service(sset, name, service_type_t::PROCESS, std::move(command), command_offsets,
+ depends_p)
+ {
+ }
+
+ ~process_service() noexcept
+ {
+ }
+};
+
+class bgproc_service : public base_process_service
+{
+ virtual void handle_exit_status(int exit_status) noexcept override;
+ virtual void exec_failed(int errcode) noexcept override;
+
+ enum class pid_result_t {
+ OK,
+ FAILED, // failed to read pid or read invalid pid
+ TERMINATED // read pid successfully, but the process already terminated
+ };
+
+ // Read the pid-file, return false on failure
+ pid_result_t read_pid_file(int *exit_status) noexcept;
+
+ public:
+ bgproc_service(service_set *sset, string name, string &&command,
+ std::list<std::pair<unsigned,unsigned>> &command_offsets,
+ std::list<prelim_dep> depends_p)
+ : base_process_service(sset, name, service_type_t::BGPROCESS, std::move(command), command_offsets,
+ depends_p)
+ {
+ }
+
+ ~bgproc_service() noexcept
+ {
+ }
+};
+
+class scripted_service : public base_process_service
+{
+ virtual void handle_exit_status(int exit_status) noexcept override;
+ virtual void exec_failed(int errcode) noexcept override;
+ virtual void bring_down() noexcept override;
+
+ virtual bool interrupt_start() noexcept override
+ {
+ // if base::interrupt_start() returns false, then start hasn't been fully interrupted, but an
+ // interrupt has been issued:
+ interrupting_start = ! base_process_service::interrupt_start();
+ return ! interrupting_start;
+ }
+
+ bool interrupting_start : 1; // running start script (true) or stop script (false)
+
+ public:
+ scripted_service(service_set *sset, string name, string &&command,
+ std::list<std::pair<unsigned,unsigned>> &command_offsets,
+ std::list<prelim_dep> depends_p)
+ : base_process_service(sset, name, service_type_t::SCRIPTED, std::move(command), command_offsets,
+ depends_p), interrupting_start(false)
+ {
+ }
+
+ ~scripted_service() noexcept
+ {
+ }
+};
// from dinit.cc:
void open_control_socket(bool report_ro_failure = true) noexcept;
void setup_external_log() noexcept;
-extern eventloop_t event_loop;
using clock_type = dasynq::clock_type;
using rearm = dasynq::rearm;
notify_listeners(service_event_t::STOPPED);
}
-dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
-{
- base_process_service *sr = service;
-
- sr->pid = -1;
- sr->exit_status = status;
-
- // Ok, for a process service, any process death which we didn't rig
- // ourselves is a bit... unexpected. Probably, the child died because
- // we asked it to (sr->service_state == STOPPING). But even if
- // we didn't, there's not much we can do.
-
- if (sr->waiting_for_execstat) {
- // We still don't have an exec() status from the forked child, wait for that
- // before doing any further processing.
- return dasynq::rearm::NOOP; // hold watch reservation
- }
-
- // Must stop watch now since handle_exit_status might result in re-launch:
- // (stop_watch instead of deregister, so that we hold watch reservation).
- stop_watch(loop);
-
- if (sr->stop_timer_armed) {
- sr->restart_timer.stop_timer(loop);
- sr->stop_timer_armed = false;
- }
-
- sr->handle_exit_status(status);
- return dasynq::rearm::NOOP;
-}
bool service_record::do_auto_restart() noexcept
{
stopped();
}
-void base_process_service::do_smooth_recovery() noexcept
-{
- if (! restart_ps_process()) {
- emergency_stop();
- services->process_queues();
- }
-}
-
-void process_service::handle_exit_status(int exit_status) noexcept
-{
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
- restarting = false;
- auto service_state = get_state();
-
- if (exit_status != 0 && service_state != service_state_t::STOPPING) {
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- WTERMSIG(exit_status));
- }
- }
-
- if (service_state == service_state_t::STARTING) {
- if (did_exit && WEXITSTATUS(exit_status) == 0) {
- started();
- }
- else {
- failed_to_start();
- }
- }
- else if (service_state == service_state_t::STOPPING) {
- // We won't log a non-zero exit status or termination due to signal here -
- // we assume that the process died because we signalled it.
- stopped();
- }
- else if (smooth_recovery && service_state == service_state_t::STARTED
- && get_target_state() == service_state_t::STARTED) {
- do_smooth_recovery();
- return;
- }
- else {
- emergency_stop();
- }
- services->process_queues();
-}
-
-void process_service::exec_failed(int errcode) noexcept
-{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
- if (get_state() == service_state_t::STARTING) {
- failed_to_start();
- }
- else {
- // Process service in smooth recovery:
- emergency_stop();
- }
-}
-
-void bgproc_service::handle_exit_status(int exit_status) noexcept
-{
- begin:
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
- auto service_state = get_state();
-
- if (exit_status != 0 && service_state != service_state_t::STOPPING) {
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- WTERMSIG(exit_status));
- }
- }
-
- // This may be a "smooth recovery" where we are restarting the process while leaving the
- // service in the STARTED state.
- if (restarting && service_state == service_state_t::STARTED) {
- restarting = false;
- bool need_stop = false;
- if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
- need_stop = true;
- }
- else {
- // We need to re-read the PID, since it has now changed.
- if (pid_file.length() != 0) {
- auto pid_result = read_pid_file(&exit_status);
- switch (pid_result) {
- case pid_result_t::FAILED:
- // Failed startup: no auto-restart.
- need_stop = true;
- break;
- case pid_result_t::TERMINATED:
- goto begin;
- case pid_result_t::OK:
- break;
- }
- }
- }
-
- if (need_stop) {
- // Failed startup: no auto-restart.
- emergency_stop();
- services->process_queues();
- }
-
- return;
- }
-
- restarting = false;
- if (service_state == service_state_t::STARTING) {
- // POSIX requires that if the process exited clearly with a status code of 0,
- // the exit status value will be 0:
- if (exit_status == 0) {
- auto pid_result = read_pid_file(&exit_status);
- switch (pid_result) {
- case pid_result_t::FAILED:
- // Failed startup: no auto-restart.
- failed_to_start();
- break;
- case pid_result_t::TERMINATED:
- // started, but immediately terminated
- started();
- goto begin;
- case pid_result_t::OK:
- started();
- break;
- }
- }
- else {
- failed_to_start();
- }
- }
- else if (service_state == service_state_t::STOPPING) {
- // We won't log a non-zero exit status or termination due to signal here -
- // we assume that the process died because we signalled it.
- stopped();
- }
- else {
- // we must be STARTED
- if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
- do_smooth_recovery();
- return;
- }
- if (! do_auto_restart() && start_explicit) {
- start_explicit = false;
- release();
- }
- forced_stop();
- stop_dependents();
- stopped();
- }
- services->process_queues();
-}
-
-void bgproc_service::exec_failed(int errcode) noexcept
-{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
- // Only time we execute is for startup:
- failed_to_start();
-}
-
-void scripted_service::handle_exit_status(int exit_status) noexcept
-{
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
- auto service_state = get_state();
-
- // For a scripted service, a termination occurs in one of three main cases:
- // - the start script completed (or failed), when service was STARTING
- // - the start script was interrupted to cancel startup; state is STOPPING
- // - the stop script complete (or failed), state is STOPPING
-
- if (service_state == service_state_t::STOPPING) {
- // We might be running the stop script, or we might be running the start script and have issued
- // a cancel order via SIGINT:
- if (did_exit && WEXITSTATUS(exit_status) == 0) {
- if (interrupting_start) {
- interrupting_start = false;
- // launch stop script:
- bring_down();
- }
- else {
- // We were running the stop script and finished successfully
- stopped();
- }
- }
- else {
- if (interrupting_start) {
- // We issued a start interrupt, so we expected this failure:
- if (did_exit) {
- log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
- WTERMSIG(exit_status));
- }
- }
- else {
- // ??? failed to stop! Let's log it as warning:
- if (did_exit) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
- WTERMSIG(exit_status));
- }
- }
- // Even if the stop script failed, assume that service is now stopped, so that any dependencies
- // can be stopped. There's not really any other useful course of action here.
- interrupting_start = false;
- stopped();
- }
- services->process_queues();
- }
- else { // STARTING
- if (exit_status == 0) {
- started();
- }
- else {
- // failed to start
- if (did_exit) {
- log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
- WTERMSIG(exit_status));
- }
- failed_to_start();
- }
- services->process_queues();
- }
-}
-
-void scripted_service::exec_failed(int errcode) noexcept
-{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
- auto service_state = get_state();
- if (service_state == service_state_t::STARTING) {
- failed_to_start();
- }
- else if (service_state == service_state_t::STOPPING) {
- // We've logged the failure, but it's probably better not to leave the service in
- // STOPPING state:
- stopped();
- }
-}
-
-rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
-{
- base_process_service *sr = service;
- sr->waiting_for_execstat = false;
-
- int exec_status;
- int r = read(get_watched_fd(), &exec_status, sizeof(int));
- deregister(loop);
- close(get_watched_fd());
-
- if (r > 0) {
- // We read an errno code; exec() failed, and the service startup failed.
- if (sr->pid != -1) {
- sr->child_listener.deregister(event_loop, sr->pid);
- sr->reserved_child_watch = false;
- if (sr->stop_timer_armed) {
- sr->restart_timer.stop_timer(loop);
- sr->stop_timer_armed = false;
- }
- }
- sr->pid = -1;
- sr->exec_failed(exec_status);
- }
- else {
- // exec() succeeded.
- if (sr->get_type() == service_type_t::PROCESS) {
- // This could be a smooth recovery (state already STARTED). Even more, the process
- // might be stopped (and killed via a signal) during smooth recovery. We don't to
- // process startup again in either case, so we check for state STARTING:
- if (sr->get_state() == service_state_t::STARTING) {
- sr->started();
- }
- else if (sr->get_state() == service_state_t::STOPPING) {
- // stopping, but smooth recovery was in process. That's now over so we can
- // commence normal stop. Note that if pid == -1 the process already stopped(!),
- // that's handled below.
- if (sr->pid != -1 && sr->stop_check_dependents()) {
- sr->bring_down();
- }
- }
- }
-
- if (sr->pid == -1) {
- // Somehow the process managed to complete before we even saw the status.
- sr->handle_exit_status(sr->exit_status);
- }
- }
-
- sr->services->process_queues();
-
- return rearm::REMOVED;
-}
void service_record::require() noexcept
{
}
}
-bgproc_service::pid_result_t
-bgproc_service::read_pid_file(int *exit_status) noexcept
-{
- const char *pid_file_c = pid_file.c_str();
- int fd = open(pid_file_c, O_CLOEXEC);
- if (fd == -1) {
- log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
- return pid_result_t::FAILED;
- }
-
- char pidbuf[21]; // just enough to hold any 64-bit integer
- int r = ss_read(fd, pidbuf, 20);
- if (r < 0) {
- // Could not read from PID file
- log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
- close(fd);
- return pid_result_t::FAILED;
- }
-
- close(fd);
- pidbuf[r] = 0; // store nul terminator
-
- bool valid_pid = false;
- try {
- unsigned long long v = std::stoull(pidbuf, nullptr, 0);
- if (v <= std::numeric_limits<pid_t>::max()) {
- pid = (pid_t) v;
- valid_pid = true;
- }
- }
- catch (std::out_of_range &exc) {
- // Too large?
- }
- catch (std::invalid_argument &exc) {
- // Ok, so it doesn't look like a number: proceed...
- }
-
- if (valid_pid) {
- pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
- if (wait_r == -1 && errno == ECHILD) {
- // We can't track this child - check process exists:
- if (kill(pid, 0) == 0 || errno != ESRCH) {
- tracking_child = false;
- return pid_result_t::OK;
- }
- else {
- log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
- pid = -1;
- return pid_result_t::FAILED;
- }
- }
- else if (wait_r == pid) {
- pid = -1;
- return pid_result_t::TERMINATED;
- }
- else if (wait_r == 0) {
- // We can track the child
- child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
- tracking_child = true;
- reserved_child_watch = true;
- return pid_result_t::OK;
- }
- }
-
- log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
- pid = -1;
- return pid_result_t::FAILED;
-}
void service_record::started() noexcept
{
return true;
}
-bool base_process_service::bring_up() noexcept
-{
- if (restarting) {
- if (pid == -1) {
- return restart_ps_process();
- }
- return true;
- }
- else {
- event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
- restart_interval_count = 0;
- if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
- if (start_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, start_timeout);
- stop_timer_armed = true;
- }
- else if (stop_timer_armed) {
- restart_timer.stop_timer(event_loop);
- stop_timer_armed = false;
- }
- return true;
- }
- return false;
- }
-}
-
-bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
-{
- // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
- // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
- // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
- // is written to the pipe, and the parent can read it.
-
- event_loop.get_time(last_start_time, clock_type::MONOTONIC);
-
- int pipefd[2];
- if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
- log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
- return false;
- }
-
- const char * logfile = this->logfile.c_str();
- if (*logfile == 0) {
- logfile = "/dev/null";
- }
-
- bool child_status_registered = false;
- control_conn_t *control_conn = nullptr;
-
- int control_socket[2] = {-1, -1};
- if (onstart_flags.pass_cs_fd) {
- if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
- log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
- goto out_p;
- }
-
- // Make the server side socket close-on-exec:
- int fdflags = fcntl(control_socket[0], F_GETFD);
- fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
-
- try {
- control_conn = new control_conn_t(event_loop, services, control_socket[0]);
- }
- catch (std::exception &exc) {
- log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
- goto out_cs;
- }
- }
-
- // Set up complete, now fork and exec:
-
- pid_t forkpid;
-
- try {
- child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
- child_status_registered = true;
-
- // We specify a high priority (i.e. low priority value) so that process termination is
- // handled early. This means we have always recorded that the process is terminated by the
- // time that we handle events that might otherwise cause us to signal the process, so we
- // avoid sending a signal to an invalid (and possibly recycled) process ID.
- forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
- reserved_child_watch = true;
- }
- catch (std::exception &e) {
- log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
- goto out_cs_h;
- }
-
- if (forkpid == 0) {
- run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
- }
- else {
- // Parent process
- close(pipefd[1]); // close the 'other end' fd
- if (control_socket[1] != -1) {
- close(control_socket[1]);
- }
- pid = forkpid;
-
- waiting_for_execstat = true;
- return true;
- }
-
- // Failure exit:
-
- out_cs_h:
- if (child_status_registered) {
- child_status_listener.deregister(event_loop);
- }
-
- if (onstart_flags.pass_cs_fd) {
- delete control_conn;
-
- out_cs:
- close(control_socket[0]);
- close(control_socket[1]);
- }
-
- out_p:
- close(pipefd[0]);
- close(pipefd[1]);
-
- return false;
-}
void service_record::run_child_proc(const char * const *args, const char *logfile, bool on_console,
int wpipefd, int csfd) noexcept
stopped();
}
-void base_process_service::kill_pg(int signo) noexcept
-{
- pid_t pgid = getpgid(pid);
- if (pgid == -1) {
- // only should happen if pid is invalid, which should never happen...
- log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
- return;
- }
- kill(-pgid, signo);
-}
-
-void base_process_service::bring_down() noexcept
-{
- waiting_for_deps = false;
- if (pid != -1) {
- // The process is still kicking on - must actually kill it. We signal the process
- // group (-pid) rather than just the process as there's less risk then of creating
- // an orphaned process group:
- if (! onstart_flags.no_sigterm) {
- kill_pg(SIGTERM);
- }
- if (term_signal != -1) {
- kill_pg(term_signal);
- }
-
- // In most cases, the rest is done in handle_exit_status.
- // If we are a BGPROCESS and the process is not our immediate child, however, that
- // won't work - check for this now:
- if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
- stopped();
- }
- else if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- }
- else {
- // The process is already dead.
- stopped();
- }
-}
-
-void process_service::bring_down() noexcept
-{
- waiting_for_deps = false;
- if (waiting_for_execstat) {
- // The process is still starting. This should be uncommon, but can occur during
- // smooth recovery. We can't do much now; we have to wait until we get the
- // status, and then act appropriately.
- return;
- }
- else if (pid != -1) {
- // The process is still kicking on - must actually kill it. We signal the process
- // group (-pid) rather than just the process as there's less risk then of creating
- // an orphaned process group:
- if (! onstart_flags.no_sigterm) {
- kill_pg(SIGTERM);
- }
- if (term_signal != -1) {
- kill_pg(term_signal);
- }
-
- // In most cases, the rest is done in handle_exit_status.
- // If we are a BGPROCESS and the process is not our immediate child, however, that
- // won't work - check for this now:
- if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
- stopped();
- }
- else if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- }
- else {
- // The process is already dead.
- stopped();
- }
-}
-
-void scripted_service::bring_down() noexcept
-{
- waiting_for_deps = false;
- if (stop_command.length() == 0) {
- stopped();
- }
- else if (! start_ps_process(stop_arg_parts, false)) {
- // Couldn't execute stop script, but there's not much we can do:
- stopped();
- }
- else {
- // successfully started stop script: start kill timer:
- if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- }
-}
-
void service_record::unpin() noexcept
{
if (pinned_started) {
{
active_services--;
}
-
-base_process_service::base_process_service(service_set *sset, string name,
- service_type_t service_type_p, string &&command,
- std::list<std::pair<unsigned,unsigned>> &command_offsets,
- const std::list<prelim_dep> &deplist_p)
- : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
- child_status_listener(this), restart_timer(this)
-{
- program_name = std::move(command);
- exec_arg_parts = separate_args(program_name, command_offsets);
-
- restart_interval_count = 0;
- restart_interval_time = {0, 0};
- restart_timer.service = this;
- restart_timer.add_timer(event_loop);
-
- // By default, allow a maximum of 3 restarts within 10.0 seconds:
- restart_interval.seconds() = 10;
- restart_interval.nseconds() = 0;
- max_restart_interval_count = 3;
-
- waiting_restart_timer = false;
- reserved_child_watch = false;
- tracking_child = false;
- stop_timer_armed = false;
- start_is_interruptible = false;
-}
-
-void base_process_service::do_restart() noexcept
-{
- waiting_restart_timer = false;
- restart_interval_count++;
- auto service_state = get_state();
-
- // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
- // the process should be granted access to the console:
- bool on_console = service_state == service_state_t::STARTING
- ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
-
- if (service_state == service_state_t::STARTING) {
- // for a smooth recovery, we want to check dependencies are available before actually
- // starting:
- if (! check_deps_started()) {
- waiting_for_deps = true;
- return;
- }
- }
-
- if (! start_ps_process(exec_arg_parts, on_console)) {
- restarting = false;
- if (service_state == service_state_t::STARTING) {
- failed_to_start();
- }
- else {
- // desired_state = service_state_t::STOPPED;
- forced_stop();
- }
- services->process_queues();
- }
-}
-
-bool base_process_service::restart_ps_process() noexcept
-{
- using time_val = dasynq::time_val;
-
- time_val current_time;
- event_loop.get_time(current_time, clock_type::MONOTONIC);
-
- if (max_restart_interval_count != 0) {
- // Check whether we're still in the most recent restart check interval:
- time_val int_diff = current_time - restart_interval_time;
- if (int_diff < restart_interval) {
- if (restart_interval_count >= max_restart_interval_count) {
- log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
- return false;
- }
- }
- else {
- restart_interval_time = current_time;
- restart_interval_count = 0;
- }
- }
-
- // Check if enough time has lapsed since the prevous restart. If not, start a timer:
- time_val tdiff = current_time - last_start_time;
- if (restart_delay <= tdiff) {
- // > restart delay (normally 200ms)
- do_restart();
- }
- else {
- time_val timeout = restart_delay - tdiff;
- restart_timer.arm_timer_rel(event_loop, timeout);
- waiting_restart_timer = true;
- }
- return true;
-}
-
-bool base_process_service::interrupt_start() noexcept
-{
- if (waiting_restart_timer) {
- restart_timer.stop_timer(event_loop);
- waiting_restart_timer = false;
- return service_record::interrupt_start();
- }
- else {
- log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
- kill_pg(SIGINT);
- if (stop_timeout != time_val(0,0)) {
- restart_timer.arm_timer_rel(event_loop, stop_timeout);
- stop_timer_armed = true;
- }
- else if (stop_timer_armed) {
- restart_timer.stop_timer(event_loop);
- stop_timer_armed = false;
- }
- set_state(service_state_t::STOPPING);
- notify_listeners(service_event_t::STARTCANCELLED);
- return false;
- }
-}
-
-void base_process_service::kill_with_fire() noexcept
-{
- if (pid != -1) {
- log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
- kill_pg(SIGKILL);
- }
-}
-
-dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
-{
- service->stop_timer_armed = false;
-
- // Timer expires if:
- // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
- // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
- // including smooth recovery (restart timeout, state is STARTING or STARTED).
- if (service->get_state() == service_state_t::STOPPING) {
- service->kill_with_fire();
- }
- else if (service->pid != -1) {
- // Starting, start timed out.
- service->stop_dependents();
- service->interrupt_start();
- }
- else {
- // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
- service->do_restart();
- }
-
- // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
- return dasynq::rearm::NOOP;
-}
}
};
-// Given a string and a list of pairs of (start,end) indices for each argument in that string,
-// store a null terminator for the argument. Return a `char *` vector containing the beginning
-// of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
-static std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
-{
- std::vector<const char *> r;
- r.reserve(arg_indices.size() + 1);
-
- // First store nul terminator for each part:
- for (auto index_pair : arg_indices) {
- if (index_pair.second < s.length()) {
- s[index_pair.second] = 0;
- }
- }
-
- // Now we can get the C string (c_str) and store offsets into it:
- const char * cstr = s.c_str();
- for (auto index_pair : arg_indices) {
- r.push_back(cstr + index_pair.first);
- }
- r.push_back(nullptr);
- return r;
-}
-
class service_child_watcher : public eventloop_t::child_proc_watcher_impl<service_child_watcher>
{
public:
}
};
-class base_process_service;
-
-// A timer for process restarting. Used to ensure a minimum delay between process restarts (and
-// also for timing service stop before the SIGKILL hammer is used).
-class process_restart_timer : public eventloop_t::timer_impl<process_restart_timer>
-{
- public:
- base_process_service * service;
-
- process_restart_timer(base_process_service *service_p)
- : service(service_p)
- {
- }
-
- dasynq::rearm timer_expiry(eventloop_t &, int expiry_count);
-};
-
-class base_process_service : public service_record
-{
- friend class service_child_watcher;
- friend class exec_status_pipe_watcher;
- friend class process_restart_timer;
-
- private:
- // Re-launch process
- void do_restart() noexcept;
-
- protected:
- string program_name; // storage for program/script and arguments
- std::vector<const char *> exec_arg_parts; // pointer to each argument/part of the program_name, and nullptr
-
- string stop_command; // storage for stop program/script and arguments
- std::vector<const char *> stop_arg_parts; // pointer to each argument/part of the stop_command, and nullptr
-
- service_child_watcher child_listener;
- exec_status_pipe_watcher child_status_listener;
- process_restart_timer restart_timer;
- time_val last_start_time;
-
- // Restart interval time and restart count are used to track the number of automatic restarts
- // over an interval. Too many restarts over an interval will inhibit further restarts.
- time_val restart_interval_time; // current restart interval
- int restart_interval_count; // count of restarts within current interval
-
- time_val restart_interval; // maximum restart interval
- int max_restart_interval_count; // number of restarts allowed over maximum interval
- time_val restart_delay; // delay between restarts
-
- // Time allowed for service stop, after which SIGKILL is sent. 0 to disable.
- time_val stop_timeout = {10, 0}; // default of 10 seconds
-
- // Time allowed for service start, after which SIGINT is sent (and then SIGKILL after
- // <stop_timeout>). 0 to disable.
- time_val start_timeout = {60, 0}; // default of 1 minute
-
- bool waiting_restart_timer : 1;
- bool stop_timer_armed : 1;
- bool reserved_child_watch : 1;
- bool tracking_child : 1; // whether we expect to see child process status
- bool start_is_interruptible : 1; // whether we can interrupt start
-
- // Launch the process with the given arguments, return true on success
- bool start_ps_process(const std::vector<const char *> &args, bool on_console) noexcept;
-
- // Restart the process (due to start failure or unexpected termination). Restarts will be
- // rate-limited.
- bool restart_ps_process() noexcept;
-
- // Perform smooth recovery process
- void do_smooth_recovery() noexcept;
-
- // Start the process, return true on success
- virtual bool bring_up() noexcept override;
-
- virtual void bring_down() noexcept override;
-
- // Called when the process exits. The exit_status is the status value yielded by
- // the "wait" system call.
- virtual void handle_exit_status(int exit_status) noexcept = 0;
-
- // Called if an exec fails.
- virtual void exec_failed(int errcode) noexcept = 0;
-
- virtual bool can_interrupt_start() noexcept override
- {
- return waiting_restart_timer || start_is_interruptible || service_record::can_interrupt_start();
- }
-
- virtual bool can_proceed_to_start() noexcept override
- {
- return ! waiting_restart_timer;
- }
-
- virtual bool interrupt_start() noexcept override;
-
- // Kill with SIGKILL
- void kill_with_fire() noexcept;
-
- // Signal the process group of the service process
- void kill_pg(int signo) noexcept;
-
- public:
- base_process_service(service_set *sset, string name, service_type_t record_type_p, string &&command,
- std::list<std::pair<unsigned,unsigned>> &command_offsets,
- const std::list<prelim_dep> &deplist_p);
-
- ~base_process_service() noexcept
- {
- }
-
- // Set the stop command and arguments (may throw std::bad_alloc)
- void set_stop_command(std::string command, std::list<std::pair<unsigned,unsigned>> &stop_command_offsets)
- {
- stop_command = command;
- stop_arg_parts = separate_args(stop_command, stop_command_offsets);
- }
-
- void set_restart_interval(timespec interval, int max_restarts) noexcept
- {
- restart_interval = interval;
- max_restart_interval_count = max_restarts;
- }
-
- void set_restart_delay(timespec delay) noexcept
- {
- restart_delay = delay;
- }
-
- void set_stop_timeout(timespec timeout) noexcept
- {
- stop_timeout = timeout;
- }
-
- void set_start_timeout(timespec timeout) noexcept
- {
- start_timeout = timeout;
- }
-
- void set_start_interruptible(bool value) noexcept
- {
- start_is_interruptible = value;
- }
-};
-
-class process_service : public base_process_service
-{
- virtual void handle_exit_status(int exit_status) noexcept override;
- virtual void exec_failed(int errcode) noexcept override;
- virtual void bring_down() noexcept override;
-
- public:
- process_service(service_set *sset, string name, string &&command,
- std::list<std::pair<unsigned,unsigned>> &command_offsets,
- std::list<prelim_dep> depends_p)
- : base_process_service(sset, name, service_type_t::PROCESS, std::move(command), command_offsets,
- depends_p)
- {
- }
-
- ~process_service() noexcept
- {
- }
-};
-
-class bgproc_service : public base_process_service
-{
- virtual void handle_exit_status(int exit_status) noexcept override;
- virtual void exec_failed(int errcode) noexcept override;
-
- enum class pid_result_t {
- OK,
- FAILED, // failed to read pid or read invalid pid
- TERMINATED // read pid successfully, but the process already terminated
- };
-
- // Read the pid-file, return false on failure
- pid_result_t read_pid_file(int *exit_status) noexcept;
-
- public:
- bgproc_service(service_set *sset, string name, string &&command,
- std::list<std::pair<unsigned,unsigned>> &command_offsets,
- std::list<prelim_dep> depends_p)
- : base_process_service(sset, name, service_type_t::BGPROCESS, std::move(command), command_offsets,
- depends_p)
- {
- }
-
- ~bgproc_service() noexcept
- {
- }
-};
-
-class scripted_service : public base_process_service
-{
- virtual void handle_exit_status(int exit_status) noexcept override;
- virtual void exec_failed(int errcode) noexcept override;
- virtual void bring_down() noexcept override;
-
- virtual bool interrupt_start() noexcept override
- {
- // if base::interrupt_start() returns false, then start hasn't been fully interrupted, but an
- // interrupt has been issued:
- interrupting_start = ! base_process_service::interrupt_start();
- return ! interrupting_start;
- }
-
- bool interrupting_start : 1; // running start script (true) or stop script (false)
-
- public:
- scripted_service(service_set *sset, string name, string &&command,
- std::list<std::pair<unsigned,unsigned>> &command_offsets,
- std::list<prelim_dep> depends_p)
- : base_process_service(sset, name, service_type_t::SCRIPTED, std::move(command), command_offsets,
- depends_p), interrupting_start(false)
- {
- }
-
- ~scripted_service() noexcept
- {
- }
-};
-
inline auto extract_prop_queue(service_record *sr) -> decltype(sr->prop_queue_node) &
{
return sr->prop_queue_node;