2 #include <sys/socket.h>
5 #include "dinit-socket.h"
6 #include "dinit-util.h"
8 #include "proc-service.h"
11 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
13 * See proc-service.h header for interface details.
16 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
17 // store a null terminator for the argument. Return a `char *` vector containing the beginning
18 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
19 std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
21 std::vector<const char *> r;
22 r.reserve(arg_indices.size() + 1);
24 // First store nul terminator for each part:
25 for (auto index_pair : arg_indices) {
26 if (index_pair.second < s.length()) {
27 s[index_pair.second] = 0;
31 // Now we can get the C string (c_str) and store offsets into it:
32 const char * cstr = s.c_str();
33 for (auto index_pair : arg_indices) {
34 r.push_back(cstr + index_pair.first);
40 void process_service::exec_succeeded() noexcept
42 // This could be a smooth recovery (state already STARTED). Even more, the process
43 // might be stopped (and killed via a signal) during smooth recovery. We don't to
44 // process startup again in either case, so we check for state STARTING:
45 if (get_state() == service_state_t::STARTING) {
48 else if (get_state() == service_state_t::STOPPING) {
49 // stopping, but smooth recovery was in process. That's now over so we can
50 // commence normal stop. Note that if pid == -1 the process already stopped(!),
51 // that's handled below.
52 if (pid != -1 && stop_check_dependents()) {
58 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
60 base_process_service *sr = service;
61 sr->waiting_for_execstat = false;
64 int r = read(get_watched_fd(), &exec_status, sizeof(int));
66 close(get_watched_fd());
69 // We read an errno code; exec() failed, and the service startup failed.
71 sr->child_listener.deregister(event_loop, sr->pid);
72 sr->reserved_child_watch = false;
73 if (sr->stop_timer_armed) {
74 sr->restart_timer.stop_timer(loop);
75 sr->stop_timer_armed = false;
79 sr->exec_failed(exec_status);
85 // Somehow the process managed to complete before we even saw the exec() status.
86 sr->handle_exit_status(sr->exit_status);
90 sr->services->process_queues();
92 return rearm::REMOVED;
95 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
97 base_process_service *sr = service;
100 sr->exit_status = bp_sys::exit_status(status);
102 // Ok, for a process service, any process death which we didn't rig
103 // ourselves is a bit... unexpected. Probably, the child died because
104 // we asked it to (sr->service_state == STOPPING). But even if
105 // we didn't, there's not much we can do.
107 if (sr->waiting_for_execstat) {
108 // We still don't have an exec() status from the forked child, wait for that
109 // before doing any further processing.
110 return dasynq::rearm::NOOP; // hold watch reservation
113 // Must stop watch now since handle_exit_status might result in re-launch:
114 // (stop_watch instead of deregister, so that we hold watch reservation).
117 if (sr->stop_timer_armed) {
118 sr->restart_timer.stop_timer(loop);
119 sr->stop_timer_armed = false;
122 sr->handle_exit_status(bp_sys::exit_status(status));
123 return dasynq::rearm::NOOP;
126 void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
128 bool did_exit = exit_status.did_exit();
129 bool was_signalled = exit_status.was_signalled();
131 auto service_state = get_state();
133 if (exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
135 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
136 exit_status.get_exit_status());
138 else if (was_signalled) {
139 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
140 exit_status.get_term_sig());
144 if (service_state == service_state_t::STARTING) {
145 if (exit_status.did_exit_clean()) {
152 else if (service_state == service_state_t::STOPPING) {
153 // We won't log a non-zero exit status or termination due to signal here -
154 // we assume that the process died because we signalled it.
155 if (stop_timer_armed) {
156 restart_timer.stop_timer(event_loop);
160 else if (smooth_recovery && service_state == service_state_t::STARTED
161 && get_target_state() == service_state_t::STARTED) {
162 do_smooth_recovery();
168 services->process_queues();
171 void process_service::exec_failed(int errcode) noexcept
173 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
174 if (get_state() == service_state_t::STARTING) {
178 // Process service in smooth recovery:
183 void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
186 bool did_exit = exit_status.did_exit();
187 bool was_signalled = exit_status.was_signalled();
188 auto service_state = get_state();
190 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
192 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
193 exit_status.get_exit_status());
195 else if (was_signalled) {
196 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
197 exit_status.get_term_sig());
201 // This may be a "smooth recovery" where we are restarting the process while leaving the
202 // service in the STARTED state.
203 if (restarting && service_state == service_state_t::STARTED) {
205 bool need_stop = false;
206 if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
210 // We need to re-read the PID, since it has now changed.
211 if (pid_file.length() != 0) {
212 auto pid_result = read_pid_file(&exit_status);
213 switch (pid_result) {
214 case pid_result_t::FAILED:
215 // Failed startup: no auto-restart.
218 case pid_result_t::TERMINATED:
220 case pid_result_t::OK:
227 // Failed startup: no auto-restart.
229 services->process_queues();
236 if (service_state == service_state_t::STARTING) {
237 // POSIX requires that if the process exited clearly with a status code of 0,
238 // the exit status value will be 0:
239 if (exit_status.did_exit_clean()) {
240 auto pid_result = read_pid_file(&exit_status);
241 switch (pid_result) {
242 case pid_result_t::FAILED:
243 // Failed startup: no auto-restart.
246 case pid_result_t::TERMINATED:
247 // started, but immediately terminated
250 case pid_result_t::OK:
259 else if (service_state == service_state_t::STOPPING) {
260 // We won't log a non-zero exit status or termination due to signal here -
261 // we assume that the process died because we signalled it.
265 // we must be STARTED
266 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
267 do_smooth_recovery();
270 if (! do_auto_restart() && start_explicit) {
271 start_explicit = false;
278 services->process_queues();
281 void bgproc_service::exec_failed(int errcode) noexcept
283 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
284 // Only time we execute is for startup:
288 void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
290 bool did_exit = exit_status.did_exit();
291 bool was_signalled = exit_status.was_signalled();
292 auto service_state = get_state();
294 // For a scripted service, a termination occurs in one of three main cases:
295 // - the start script completed (or failed), when service was STARTING
296 // - the start script was interrupted to cancel startup; state is STOPPING
297 // - the stop script complete (or failed), state is STOPPING
299 if (service_state == service_state_t::STOPPING) {
300 // We might be running the stop script, or we might be running the start script and have issued
301 // a cancel order via SIGINT:
302 if (interrupting_start) {
303 // We issued a start interrupt, so we expected this failure:
304 if (did_exit && exit_status.get_exit_status() != 0) {
305 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
306 exit_status.get_exit_status());
307 // Assume that a command terminating normally requires no cleanup:
312 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
313 exit_status.get_term_sig());
315 // If the start script completed successfully, or was interrupted via our signal,
316 // we want to run the stop script to clean up:
319 interrupting_start = false;
321 else if (exit_status.did_exit_clean()) {
322 // We were running the stop script and finished successfully
326 // ??? failed to stop! Let's log it as warning:
328 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
329 exit_status.get_exit_status());
331 else if (was_signalled) {
332 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
333 exit_status.get_term_sig());
335 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
336 // can be stopped. There's not really any other useful course of action here.
339 services->process_queues();
342 if (exit_status.did_exit_clean()) {
348 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
349 exit_status.get_exit_status());
351 else if (was_signalled) {
352 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
353 exit_status.get_term_sig());
357 services->process_queues();
361 void scripted_service::exec_failed(int errcode) noexcept
363 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
364 auto service_state = get_state();
365 if (service_state == service_state_t::STARTING) {
368 else if (service_state == service_state_t::STOPPING) {
369 // We've logged the failure, but it's probably better not to leave the service in
375 bgproc_service::pid_result_t
376 bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
378 const char *pid_file_c = pid_file.c_str();
379 int fd = open(pid_file_c, O_CLOEXEC);
381 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
382 return pid_result_t::FAILED;
385 char pidbuf[21]; // just enough to hold any 64-bit integer
386 int r = ss_read(fd, pidbuf, 20);
388 // Could not read from PID file
389 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
391 return pid_result_t::FAILED;
395 pidbuf[r] = 0; // store nul terminator
397 bool valid_pid = false;
399 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
400 if (v <= std::numeric_limits<pid_t>::max()) {
405 catch (std::out_of_range &exc) {
408 catch (std::invalid_argument &exc) {
409 // Ok, so it doesn't look like a number: proceed...
413 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
414 if (wait_r == -1 && errno == ECHILD) {
415 // We can't track this child - check process exists:
416 if (kill(pid, 0) == 0 || errno != ESRCH) {
417 tracking_child = false;
418 return pid_result_t::OK;
421 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
423 return pid_result_t::FAILED;
426 else if (wait_r == pid) {
428 return pid_result_t::TERMINATED;
430 else if (wait_r == 0) {
431 // We can track the child
432 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
433 tracking_child = true;
434 reserved_child_watch = true;
435 return pid_result_t::OK;
439 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
441 return pid_result_t::FAILED;
444 void process_service::bring_down() noexcept
446 if (waiting_for_execstat) {
447 // The process is still starting. This should be uncommon, but can occur during
448 // smooth recovery. We can't do much now; we have to wait until we get the
449 // status, and then act appropriately.
452 else if (pid != -1) {
453 // The process is still kicking on - must actually kill it. We signal the process
454 // group (-pid) rather than just the process as there's less risk then of creating
455 // an orphaned process group:
456 if (! onstart_flags.no_sigterm) {
459 if (term_signal != -1) {
460 kill_pg(term_signal);
463 // If there's a stop timeout, arm the timer now:
464 if (stop_timeout != time_val(0,0)) {
465 restart_timer.arm_timer_rel(event_loop, stop_timeout);
466 stop_timer_armed = true;
469 // The rest is done in handle_exit_status.
472 // The process is already dead.
477 void bgproc_service::bring_down() noexcept
480 // The process is still kicking on - must actually kill it. We signal the process
481 // group (-pid) rather than just the process as there's less risk then of creating
482 // an orphaned process group:
483 if (! onstart_flags.no_sigterm) {
486 if (term_signal != -1) {
487 kill_pg(term_signal);
490 // In most cases, the rest is done in handle_exit_status.
491 // If we are a BGPROCESS and the process is not our immediate child, however, that
492 // won't work - check for this now:
493 if (! tracking_child) {
496 else if (stop_timeout != time_val(0,0)) {
497 restart_timer.arm_timer_rel(event_loop, stop_timeout);
498 stop_timer_armed = true;
502 // The process is already dead.
507 void scripted_service::bring_down() noexcept
509 if (stop_command.length() == 0) {
512 else if (! start_ps_process(stop_arg_parts, false)) {
513 // Couldn't execute stop script, but there's not much we can do:
517 // successfully started stop script: start kill timer:
518 if (stop_timeout != time_val(0,0)) {
519 restart_timer.arm_timer_rel(event_loop, stop_timeout);
520 stop_timer_armed = true;
525 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
527 service->timer_expired();
529 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
530 return dasynq::rearm::NOOP;