4 #include <sys/socket.h>
7 #include "dinit-socket.h"
8 #include "dinit-util.h"
10 #include "proc-service.h"
13 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
15 * See proc-service.h header for interface details.
18 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
19 // store a null terminator for the argument. Return a `char *` vector containing the beginning
20 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
21 std::vector<const char *> separate_args(std::string &s,
22 const std::list<std::pair<unsigned,unsigned>> &arg_indices)
24 std::vector<const char *> r;
25 r.reserve(arg_indices.size() + 1);
27 // First store nul terminator for each part:
28 for (auto index_pair : arg_indices) {
29 if (index_pair.second < s.length()) {
30 s[index_pair.second] = 0;
34 // Now we can get the C string (c_str) and store offsets into it:
35 const char * cstr = s.c_str();
36 for (auto index_pair : arg_indices) {
37 r.push_back(cstr + index_pair.first);
43 void process_service::exec_succeeded() noexcept
45 // This could be a smooth recovery (state already STARTED). Even more, the process
46 // might be stopped (and killed via a signal) during smooth recovery. We don't to
47 // process startup again in either case, so we check for state STARTING:
48 if (get_state() == service_state_t::STARTING) {
51 else if (get_state() == service_state_t::STOPPING) {
52 // stopping, but smooth recovery was in process. That's now over so we can
53 // commence normal stop. Note that if pid == -1 the process already stopped(!),
54 // that's handled below.
55 if (pid != -1 && stop_check_dependents()) {
61 void scripted_service::exec_succeeded() noexcept
63 // For a scripted service, this means nothing other than that the start/stop
64 // script will now begin.
67 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
69 base_process_service *sr = service;
70 sr->waiting_for_execstat = false;
73 int r = read(get_watched_fd(), &exec_status, sizeof(int));
75 close(get_watched_fd());
78 // We read an errno code; exec() failed, and the service startup failed.
80 sr->child_listener.deregister(event_loop, sr->pid);
81 sr->reserved_child_watch = false;
82 if (sr->stop_timer_armed) {
83 sr->restart_timer.stop_timer(loop);
84 sr->stop_timer_armed = false;
88 sr->exec_failed(exec_status);
94 // Somehow the process managed to complete before we even saw the exec() status.
95 sr->handle_exit_status(sr->exit_status);
99 sr->services->process_queues();
101 return rearm::REMOVED;
104 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
106 base_process_service *sr = service;
109 sr->exit_status = bp_sys::exit_status(status);
111 // Ok, for a process service, any process death which we didn't rig
112 // ourselves is a bit... unexpected. Probably, the child died because
113 // we asked it to (sr->service_state == STOPPING). But even if
114 // we didn't, there's not much we can do.
116 if (sr->waiting_for_execstat) {
117 // We still don't have an exec() status from the forked child, wait for that
118 // before doing any further processing.
119 return dasynq::rearm::NOOP; // hold watch reservation
122 // Must stop watch now since handle_exit_status might result in re-launch:
123 // (stop_watch instead of deregister, so that we hold watch reservation).
126 if (sr->stop_timer_armed) {
127 sr->restart_timer.stop_timer(loop);
128 sr->stop_timer_armed = false;
131 sr->handle_exit_status(bp_sys::exit_status(status));
132 return dasynq::rearm::NOOP;
135 void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
137 bool did_exit = exit_status.did_exit();
138 bool was_signalled = exit_status.was_signalled();
140 auto service_state = get_state();
142 if (exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
144 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
145 exit_status.get_exit_status());
147 else if (was_signalled) {
148 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
149 exit_status.get_term_sig());
153 if (service_state == service_state_t::STARTING) {
154 if (exit_status.did_exit_clean()) {
158 stop_reason = stopped_reason_t::FAILED;
162 else if (service_state == service_state_t::STOPPING) {
163 // We won't log a non-zero exit status or termination due to signal here -
164 // we assume that the process died because we signalled it.
165 if (stop_timer_armed) {
166 restart_timer.stop_timer(event_loop);
170 else if (smooth_recovery && service_state == service_state_t::STARTED
171 && get_target_state() == service_state_t::STARTED) {
172 do_smooth_recovery();
176 stop_reason = stopped_reason_t::TERMINATED;
179 services->process_queues();
182 void process_service::exec_failed(int errcode) noexcept
184 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
185 if (get_state() == service_state_t::STARTING) {
186 stop_reason = stopped_reason_t::EXECFAILED;
190 // Process service in smooth recovery:
191 stop_reason = stopped_reason_t::TERMINATED;
196 void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
199 bool did_exit = exit_status.did_exit();
200 bool was_signalled = exit_status.was_signalled();
201 auto service_state = get_state();
203 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
205 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
206 exit_status.get_exit_status());
208 else if (was_signalled) {
209 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
210 exit_status.get_term_sig());
214 // This may be a "smooth recovery" where we are restarting the process while leaving the
215 // service in the STARTED state.
216 if (restarting && service_state == service_state_t::STARTED) {
218 bool need_stop = false;
219 if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
223 // We need to re-read the PID, since it has now changed.
224 if (pid_file.length() != 0) {
225 auto pid_result = read_pid_file(&exit_status);
226 switch (pid_result) {
227 case pid_result_t::FAILED:
228 // Failed startup: no auto-restart.
231 case pid_result_t::TERMINATED:
233 case pid_result_t::OK:
240 // Failed startup: no auto-restart.
241 stop_reason = stopped_reason_t::TERMINATED;
243 services->process_queues();
250 if (service_state == service_state_t::STARTING) {
251 // POSIX requires that if the process exited clearly with a status code of 0,
252 // the exit status value will be 0:
253 if (exit_status.did_exit_clean()) {
254 auto pid_result = read_pid_file(&exit_status);
255 switch (pid_result) {
256 case pid_result_t::FAILED:
257 // Failed startup: no auto-restart.
258 stop_reason = stopped_reason_t::FAILED;
261 case pid_result_t::TERMINATED:
262 // started, but immediately terminated
265 case pid_result_t::OK:
271 stop_reason = stopped_reason_t::FAILED;
275 else if (service_state == service_state_t::STOPPING) {
276 // We won't log a non-zero exit status or termination due to signal here -
277 // we assume that the process died because we signalled it.
281 // we must be STARTED
282 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
283 do_smooth_recovery();
286 if (! do_auto_restart() && start_explicit) {
287 start_explicit = false;
290 stop_reason = stopped_reason_t::TERMINATED;
295 services->process_queues();
298 void bgproc_service::exec_failed(int errcode) noexcept
300 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
301 // Only time we execute is for startup:
302 stop_reason = stopped_reason_t::EXECFAILED;
306 void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
308 bool did_exit = exit_status.did_exit();
309 bool was_signalled = exit_status.was_signalled();
310 auto service_state = get_state();
312 // For a scripted service, a termination occurs in one of three main cases:
313 // - the start script completed (or failed), when service was STARTING
314 // - the start script was interrupted to cancel startup; state is STOPPING
315 // - the stop script complete (or failed), state is STOPPING
317 if (service_state == service_state_t::STOPPING) {
318 // We might be running the stop script, or we might be running the start script and have issued
319 // a cancel order via SIGINT:
320 if (interrupting_start) {
321 // We issued a start interrupt, so we expected this failure:
322 if (did_exit && exit_status.get_exit_status() != 0) {
323 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
324 exit_status.get_exit_status());
325 // Assume that a command terminating normally requires no cleanup:
330 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
331 exit_status.get_term_sig());
333 // If the start script completed successfully, or was interrupted via our signal,
334 // we want to run the stop script to clean up:
337 interrupting_start = false;
339 else if (exit_status.did_exit_clean()) {
340 // We were running the stop script and finished successfully
344 // ??? failed to stop! Let's log it as warning:
346 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
347 exit_status.get_exit_status());
349 else if (was_signalled) {
350 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
351 exit_status.get_term_sig());
353 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
354 // can be stopped. There's not really any other useful course of action here.
357 services->process_queues();
360 if (exit_status.did_exit_clean()) {
363 else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
364 // A skippable service can be skipped by interrupting (eg by ^C if the service
365 // starts on the console).
366 start_skipped = true;
372 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
373 exit_status.get_exit_status());
375 else if (was_signalled) {
376 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
377 exit_status.get_term_sig());
379 stop_reason = stopped_reason_t::FAILED;
382 services->process_queues();
386 void scripted_service::exec_failed(int errcode) noexcept
388 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
389 auto service_state = get_state();
390 if (service_state == service_state_t::STARTING) {
391 stop_reason = stopped_reason_t::EXECFAILED;
394 else if (service_state == service_state_t::STOPPING) {
395 // We've logged the failure, but it's probably better not to leave the service in
401 bgproc_service::pid_result_t
402 bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
404 const char *pid_file_c = pid_file.c_str();
405 int fd = open(pid_file_c, O_CLOEXEC);
407 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
408 return pid_result_t::FAILED;
411 char pidbuf[21]; // just enough to hold any 64-bit integer
412 int r = complete_read(fd, pidbuf, 20);
414 // Could not read from PID file
415 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
417 return pid_result_t::FAILED;
421 pidbuf[r] = 0; // store nul terminator
423 bool valid_pid = false;
425 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
426 if (v <= std::numeric_limits<pid_t>::max()) {
431 catch (std::out_of_range &exc) {
434 catch (std::invalid_argument &exc) {
435 // Ok, so it doesn't look like a number: proceed...
439 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
440 if (wait_r == -1 && errno == ECHILD) {
441 // We can't track this child - check process exists:
442 if (kill(pid, 0) == 0 || errno != ESRCH) {
443 tracking_child = false;
444 return pid_result_t::OK;
447 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
449 return pid_result_t::FAILED;
452 else if (wait_r == pid) {
454 return pid_result_t::TERMINATED;
456 else if (wait_r == 0) {
457 // We can track the child
458 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
459 tracking_child = true;
460 reserved_child_watch = true;
461 return pid_result_t::OK;
465 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
467 return pid_result_t::FAILED;
470 void process_service::bring_down() noexcept
472 if (waiting_for_execstat) {
473 // The process is still starting. This should be uncommon, but can occur during
474 // smooth recovery. We can't do much now; we have to wait until we get the
475 // status, and then act appropriately.
478 else if (pid != -1) {
479 // The process is still kicking on - must actually kill it. We signal the process
480 // group (-pid) rather than just the process as there's less risk then of creating
481 // an orphaned process group:
482 if (! onstart_flags.no_sigterm) {
485 if (term_signal != -1) {
486 kill_pg(term_signal);
489 // If there's a stop timeout, arm the timer now:
490 if (stop_timeout != time_val(0,0)) {
491 restart_timer.arm_timer_rel(event_loop, stop_timeout);
492 stop_timer_armed = true;
495 // The rest is done in handle_exit_status.
498 // The process is already dead.
503 void bgproc_service::bring_down() noexcept
506 // The process is still kicking on - must actually kill it. We signal the process
507 // group (-pid) rather than just the process as there's less risk then of creating
508 // an orphaned process group:
509 if (! onstart_flags.no_sigterm) {
512 if (term_signal != -1) {
513 kill_pg(term_signal);
516 // In most cases, the rest is done in handle_exit_status.
517 // If we are a BGPROCESS and the process is not our immediate child, however, that
518 // won't work - check for this now:
519 if (! tracking_child) {
522 else if (stop_timeout != time_val(0,0)) {
523 restart_timer.arm_timer_rel(event_loop, stop_timeout);
524 stop_timer_armed = true;
528 // The process is already dead.
533 void scripted_service::bring_down() noexcept
536 // We're already running the stop script; nothing to do.
540 if (stop_command.length() == 0) {
543 else if (! start_ps_process(stop_arg_parts, false)) {
544 // Couldn't execute stop script, but there's not much we can do:
548 // successfully started stop script: start kill timer:
549 if (stop_timeout != time_val(0,0)) {
550 restart_timer.arm_timer_rel(event_loop, stop_timeout);
551 stop_timer_armed = true;
556 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
558 service->timer_expired();
560 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
561 return dasynq::rearm::NOOP;