+#include <cstring>
+#include <type_traits>
+
#include <sys/un.h>
#include <sys/socket.h>
* See proc-service.h header for interface details.
*/
+// Strings describing the execution stages (failure points).
+const char * const exec_stage_descriptions[static_cast<int>(exec_stage::DO_EXEC) + 1] = {
+ "arranging file descriptors", // ARRANGE_FDS
+ "reading environment file", // READ_ENV_FILE
+ "setting environment variable", // SET_NOTIFYFD_VAR
+ "setting up activation socket", // SETUP_ACTIVATION_SOCKET
+ "setting up control socket", // SETUP_CONTROL_SOCKET
+ "changing directory", // CHDIR
+ "setting up standard input/output descriptors", // SETUP_STDINOUTERR
+ "setting resource limits", // SET_RLIMITS
+ "setting user/group ID", // SET_UIDGID
+ "executing command" // DO_EXEC
+};
+
// Given a string and a list of pairs of (start,end) indices for each argument in that string,
// store a null terminator for the argument. Return a `char *` vector containing the beginning
-// of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
-std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
+// of each argument and a trailing nullptr. (The returned array is invalidated if the string is
+// later modified).
+std::vector<const char *> separate_args(std::string &s,
+ const std::list<std::pair<unsigned,unsigned>> &arg_indices)
{
std::vector<const char *> r;
r.reserve(arg_indices.size() + 1);
// might be stopped (and killed via a signal) during smooth recovery. We don't to
// process startup again in either case, so we check for state STARTING:
if (get_state() == service_state_t::STARTING) {
- started();
+ if (force_notification_fd != -1 || !notification_var.empty()) {
+ // Wait for readiness notification:
+ readiness_watcher.set_enabled(event_loop, true);
+ }
+ else {
+ started();
+ }
}
else if (get_state() == service_state_t::STOPPING) {
// stopping, but smooth recovery was in process. That's now over so we can
}
}
+void scripted_service::exec_succeeded() noexcept
+{
+ // For a scripted service, this means nothing other than that the start/stop
+ // script will now begin.
+}
+
rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
{
base_process_service *sr = service;
sr->waiting_for_execstat = false;
- int exec_status;
- int r = read(get_watched_fd(), &exec_status, sizeof(int));
+ run_proc_err exec_status;
+ int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
deregister(loop);
close(get_watched_fd());
return rearm::REMOVED;
}
+rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
+{
+ char buf[128];
+ if (service->get_state() == service_state_t::STARTING) {
+ // can we actually read anything from the notification pipe?
+ int r = bp_sys::read(fd, buf, sizeof(buf));
+ if (r > 0) {
+ service->started();
+ }
+ else if (r == 0 || errno != EAGAIN) {
+ service->failed_to_start(false, false);
+ service->set_state(service_state_t::STOPPING);
+ service->bring_down();
+ }
+ }
+ else {
+ // Just keep consuming data from the pipe:
+ int r = bp_sys::read(fd, buf, sizeof(buf));
+ if (r == 0) {
+ // Process closed write end or terminated
+ close(fd);
+ service->notification_fd = -1;
+ return rearm::DISARM;
+ }
+ }
+
+ service->services->process_queues();
+ return rearm::REARM;
+}
+
dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
{
base_process_service *sr = service;
sr->pid = -1;
- sr->exit_status = status;
+ sr->exit_status = bp_sys::exit_status(status);
- // Ok, for a process service, any process death which we didn't rig
- // ourselves is a bit... unexpected. Probably, the child died because
- // we asked it to (sr->service_state == STOPPING). But even if
- // we didn't, there's not much we can do.
+ // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
+ // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
+ // didn't, there's not much we can do.
if (sr->waiting_for_execstat) {
// We still don't have an exec() status from the forked child, wait for that
sr->stop_timer_armed = false;
}
- sr->handle_exit_status(status);
+ sr->handle_exit_status(bp_sys::exit_status(status));
return dasynq::rearm::NOOP;
}
-void process_service::handle_exit_status(int exit_status) noexcept
+void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
{
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
- restarting = false;
+ bool did_exit = exit_status.did_exit();
+ bool was_signalled = exit_status.was_signalled();
auto service_state = get_state();
- if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+ if (notification_fd != -1) {
+ readiness_watcher.deregister(event_loop);
+ bp_sys::close(notification_fd);
+ notification_fd = -1;
+ }
+
+ if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
if (did_exit) {
log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- WEXITSTATUS(exit_status));
+ exit_status.get_exit_status());
}
else if (was_signalled) {
log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- WTERMSIG(exit_status));
+ exit_status.get_term_sig());
}
}
+#if USE_UTMPX
+ if (*inittab_id || *inittab_line) {
+ clear_utmp_entry(inittab_id, inittab_line);
+ }
+#endif
+
if (service_state == service_state_t::STARTING) {
- if (did_exit && WEXITSTATUS(exit_status) == 0) {
- started();
- }
- else {
- failed_to_start();
- }
+ // If state is STARTING, we must be waiting for readiness notification; the process has
+ // terminated before becoming ready.
+ stop_reason = stopped_reason_t::FAILED;
+ failed_to_start();
}
else if (service_state == service_state_t::STOPPING) {
// We won't log a non-zero exit status or termination due to signal here -
// we assume that the process died because we signalled it.
+ if (stop_timer_armed) {
+ restart_timer.stop_timer(event_loop);
+ }
stopped();
}
else if (smooth_recovery && service_state == service_state_t::STARTED
return;
}
else {
+ stop_reason = stopped_reason_t::TERMINATED;
emergency_stop();
}
services->process_queues();
}
-void process_service::exec_failed(int errcode) noexcept
+void process_service::exec_failed(run_proc_err errcode) noexcept
{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ log(loglevel_t::ERROR, get_name(), ": execution failed - ",
+ exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
+
+ if (notification_fd != -1) {
+ readiness_watcher.deregister(event_loop);
+ bp_sys::close(notification_fd);
+ notification_fd = -1;
+ }
+
if (get_state() == service_state_t::STARTING) {
+ stop_reason = stopped_reason_t::EXECFAILED;
failed_to_start();
}
else {
// Process service in smooth recovery:
+ stop_reason = stopped_reason_t::TERMINATED;
emergency_stop();
}
}
-void bgproc_service::handle_exit_status(int exit_status) noexcept
+void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
{
begin:
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
+ bool did_exit = exit_status.did_exit();
+ bool was_signalled = exit_status.was_signalled();
auto service_state = get_state();
- if (exit_status != 0 && service_state != service_state_t::STOPPING) {
+ if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
if (did_exit) {
log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
- WEXITSTATUS(exit_status));
+ exit_status.get_exit_status());
}
else if (was_signalled) {
log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
- WTERMSIG(exit_status));
+ exit_status.get_term_sig());
}
}
// This may be a "smooth recovery" where we are restarting the process while leaving the
// service in the STARTED state.
if (restarting && service_state == service_state_t::STARTED) {
- restarting = false;
+ //restarting = false;
bool need_stop = false;
- if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
+ if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
need_stop = true;
}
else {
if (need_stop) {
// Failed startup: no auto-restart.
+ stop_reason = stopped_reason_t::TERMINATED;
emergency_stop();
services->process_queues();
}
return;
}
- restarting = false;
+ //restarting = false;
if (service_state == service_state_t::STARTING) {
// POSIX requires that if the process exited clearly with a status code of 0,
// the exit status value will be 0:
- if (exit_status == 0) {
+ if (exit_status.did_exit_clean()) {
auto pid_result = read_pid_file(&exit_status);
switch (pid_result) {
case pid_result_t::FAILED:
// Failed startup: no auto-restart.
+ stop_reason = stopped_reason_t::FAILED;
failed_to_start();
break;
case pid_result_t::TERMINATED:
}
}
else {
+ stop_reason = stopped_reason_t::FAILED;
failed_to_start();
}
}
do_smooth_recovery();
return;
}
- if (! do_auto_restart() && start_explicit) {
- start_explicit = false;
- release();
- }
+ stop_reason = stopped_reason_t::TERMINATED;
forced_stop();
stop_dependents();
stopped();
services->process_queues();
}
-void bgproc_service::exec_failed(int errcode) noexcept
+void bgproc_service::exec_failed(run_proc_err errcode) noexcept
{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ log(loglevel_t::ERROR, get_name(), ": execution failed - ",
+ exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
+
// Only time we execute is for startup:
+ stop_reason = stopped_reason_t::EXECFAILED;
failed_to_start();
}
-void scripted_service::handle_exit_status(int exit_status) noexcept
+void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
{
- bool did_exit = WIFEXITED(exit_status);
- bool was_signalled = WIFSIGNALED(exit_status);
+ bool did_exit = exit_status.did_exit();
+ bool was_signalled = exit_status.was_signalled();
auto service_state = get_state();
// For a scripted service, a termination occurs in one of three main cases:
if (service_state == service_state_t::STOPPING) {
// We might be running the stop script, or we might be running the start script and have issued
// a cancel order via SIGINT:
- if (did_exit && WEXITSTATUS(exit_status) == 0) {
- if (interrupting_start) {
- interrupting_start = false;
- // launch stop script:
- bring_down();
+ if (interrupting_start) {
+ if (stop_timer_armed) {
+ restart_timer.stop_timer(event_loop);
+ stop_timer_armed = false;
}
- else {
- // We were running the stop script and finished successfully
+ // We issued a start interrupt, so we expected this failure:
+ if (did_exit && exit_status.get_exit_status() != 0) {
+ log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
+ exit_status.get_exit_status());
+ // Assume that a command terminating normally (with failure status) requires no cleanup:
stopped();
}
- }
- else {
- if (interrupting_start) {
- // We issued a start interrupt, so we expected this failure:
- if (did_exit) {
- log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
+ else {
+ if (was_signalled) {
log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
- WTERMSIG(exit_status));
+ exit_status.get_term_sig());
}
+ // If the start script completed successfully, or was interrupted via our signal,
+ // we want to run the stop script to clean up:
+ bring_down();
}
- else {
- // ??? failed to stop! Let's log it as warning:
- if (did_exit) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
- WEXITSTATUS(exit_status));
- }
- else if (was_signalled) {
- log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
- WTERMSIG(exit_status));
- }
+ interrupting_start = false;
+ }
+ else if (exit_status.did_exit_clean()) {
+ // We were running the stop script and finished successfully
+ stopped();
+ }
+ else {
+ // ??? failed to stop! Let's log it as warning:
+ if (did_exit) {
+ log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
+ exit_status.get_exit_status());
+ }
+ else if (was_signalled) {
+ log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
+ exit_status.get_term_sig());
}
// Even if the stop script failed, assume that service is now stopped, so that any dependencies
// can be stopped. There's not really any other useful course of action here.
- interrupting_start = false;
stopped();
}
services->process_queues();
}
else { // STARTING
- if (exit_status == 0) {
+ if (exit_status.did_exit_clean()) {
+ started();
+ }
+ else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
+ // A skippable service can be skipped by interrupting (eg by ^C if the service
+ // starts on the console).
+ start_skipped = true;
started();
}
else {
// failed to start
if (did_exit) {
log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
- WEXITSTATUS(exit_status));
+ exit_status.get_exit_status());
}
else if (was_signalled) {
log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
- WTERMSIG(exit_status));
+ exit_status.get_term_sig());
}
+ stop_reason = stopped_reason_t::FAILED;
failed_to_start();
}
services->process_queues();
}
}
-void scripted_service::exec_failed(int errcode) noexcept
+void scripted_service::exec_failed(run_proc_err errcode) noexcept
{
- log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
+ log(loglevel_t::ERROR, get_name(), ": execution failed - ",
+ exec_stage_descriptions[static_cast<int>(errcode.stage)], strerror(errcode.st_errno));
auto service_state = get_state();
if (service_state == service_state_t::STARTING) {
+ stop_reason = stopped_reason_t::EXECFAILED;
failed_to_start();
}
else if (service_state == service_state_t::STOPPING) {
}
}
+// Return a value as an unsigned-type value.
+template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
+{
+ return static_cast<typename std::make_unsigned<T>::type>(val);
+}
+
bgproc_service::pid_result_t
-bgproc_service::read_pid_file(int *exit_status) noexcept
+bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
{
const char *pid_file_c = pid_file.c_str();
int fd = open(pid_file_c, O_CLOEXEC);
}
char pidbuf[21]; // just enough to hold any 64-bit integer
- int r = ss_read(fd, pidbuf, 20);
+ int r = complete_read(fd, pidbuf, 20);
if (r < 0) {
// Could not read from PID file
log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
bool valid_pid = false;
try {
unsigned long long v = std::stoull(pidbuf, nullptr, 0);
- if (v <= std::numeric_limits<pid_t>::max()) {
+ if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
pid = (pid_t) v;
valid_pid = true;
}
void process_service::bring_down() noexcept
{
- waiting_for_deps = false;
if (waiting_for_execstat) {
// The process is still starting. This should be uncommon, but can occur during
// smooth recovery. We can't do much now; we have to wait until we get the
kill_pg(term_signal);
}
+ // If there's a stop timeout, arm the timer now:
+ if (stop_timeout != time_val(0,0)) {
+ restart_timer.arm_timer_rel(event_loop, stop_timeout);
+ stop_timer_armed = true;
+ }
+
+ // The rest is done in handle_exit_status.
+ }
+ else {
+ // The process is already dead.
+ stopped();
+ }
+}
+
+void bgproc_service::bring_down() noexcept
+{
+ if (pid != -1) {
+ // The process is still kicking on - must actually kill it. We signal the process
+ // group (-pid) rather than just the process as there's less risk then of creating
+ // an orphaned process group:
+ if (! onstart_flags.no_sigterm) {
+ kill_pg(SIGTERM);
+ }
+ if (term_signal != -1) {
+ kill_pg(term_signal);
+ }
+
// In most cases, the rest is done in handle_exit_status.
// If we are a BGPROCESS and the process is not our immediate child, however, that
// won't work - check for this now:
- if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
+ if (! tracking_child) {
stopped();
}
else if (stop_timeout != time_val(0,0)) {
void scripted_service::bring_down() noexcept
{
- waiting_for_deps = false;
+ if (pid != -1) {
+ // We're already running the stop script; nothing to do.
+ return;
+ }
+
if (stop_command.length() == 0) {
stopped();
}
dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
{
- service->stop_timer_armed = false;
-
- // Timer expires if:
- // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
- // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
- // including smooth recovery (restart timeout, state is STARTING or STARTED).
- if (service->get_state() == service_state_t::STOPPING) {
- service->kill_with_fire();
- }
- else if (service->pid != -1) {
- // Starting, start timed out.
- service->stop_dependents();
- service->interrupt_start();
- }
- else {
- // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
- service->do_restart();
- }
+ service->timer_expired();
// Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
return dasynq::rearm::NOOP;