4 #include <sys/socket.h>
7 #include "dinit-socket.h"
8 #include "dinit-util.h"
10 #include "proc-service.h"
13 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
15 * See proc-service.h header for interface details.
18 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
19 // store a null terminator for the argument. Return a `char *` vector containing the beginning
20 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
22 std::vector<const char *> separate_args(std::string &s,
23 const std::list<std::pair<unsigned,unsigned>> &arg_indices)
25 std::vector<const char *> r;
26 r.reserve(arg_indices.size() + 1);
28 // First store nul terminator for each part:
29 for (auto index_pair : arg_indices) {
30 if (index_pair.second < s.length()) {
31 s[index_pair.second] = 0;
35 // Now we can get the C string (c_str) and store offsets into it:
36 const char * cstr = s.c_str();
37 for (auto index_pair : arg_indices) {
38 r.push_back(cstr + index_pair.first);
44 void process_service::exec_succeeded() noexcept
46 // This could be a smooth recovery (state already STARTED). Even more, the process
47 // might be stopped (and killed via a signal) during smooth recovery. We don't to
48 // process startup again in either case, so we check for state STARTING:
49 if (get_state() == service_state_t::STARTING) {
50 if (force_notification_fd != -1 || !notification_var.empty()) {
51 // Wait for readiness notification:
52 readiness_watcher.set_enabled(event_loop, true);
58 else if (get_state() == service_state_t::STOPPING) {
59 // stopping, but smooth recovery was in process. That's now over so we can
60 // commence normal stop. Note that if pid == -1 the process already stopped(!),
61 // that's handled below.
62 if (pid != -1 && stop_check_dependents()) {
68 void scripted_service::exec_succeeded() noexcept
70 // For a scripted service, this means nothing other than that the start/stop
71 // script will now begin.
74 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
76 base_process_service *sr = service;
77 sr->waiting_for_execstat = false;
80 int r = read(get_watched_fd(), &exec_status, sizeof(int));
82 close(get_watched_fd());
85 // We read an errno code; exec() failed, and the service startup failed.
87 sr->child_listener.deregister(event_loop, sr->pid);
88 sr->reserved_child_watch = false;
89 if (sr->stop_timer_armed) {
90 sr->restart_timer.stop_timer(loop);
91 sr->stop_timer_armed = false;
95 sr->exec_failed(exec_status);
101 // Somehow the process managed to complete before we even saw the exec() status.
102 sr->handle_exit_status(sr->exit_status);
106 sr->services->process_queues();
108 return rearm::REMOVED;
111 rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
114 if (service->get_state() == service_state_t::STARTING) {
115 // can we actually read anything from the notification pipe?
116 int r = bp_sys::read(fd, buf, sizeof(buf));
120 else if (r == 0 || errno != EAGAIN) {
121 service->failed_to_start(false, false);
122 service->set_state(service_state_t::STOPPING);
123 service->bring_down();
127 // Just keep consuming data from the pipe:
128 int r = bp_sys::read(fd, buf, sizeof(buf));
130 // Process closed write end or terminated
132 service->notification_fd = -1;
133 return rearm::DISARM;
139 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
141 base_process_service *sr = service;
144 sr->exit_status = bp_sys::exit_status(status);
146 // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
147 // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
148 // didn't, there's not much we can do.
150 if (sr->waiting_for_execstat) {
151 // We still don't have an exec() status from the forked child, wait for that
152 // before doing any further processing.
153 return dasynq::rearm::NOOP; // hold watch reservation
156 // Must stop watch now since handle_exit_status might result in re-launch:
157 // (stop_watch instead of deregister, so that we hold watch reservation).
160 if (sr->stop_timer_armed) {
161 sr->restart_timer.stop_timer(loop);
162 sr->stop_timer_armed = false;
165 sr->handle_exit_status(bp_sys::exit_status(status));
166 return dasynq::rearm::NOOP;
169 void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
171 bool did_exit = exit_status.did_exit();
172 bool was_signalled = exit_status.was_signalled();
174 auto service_state = get_state();
176 if (notification_fd != -1) {
177 readiness_watcher.deregister(event_loop);
178 bp_sys::close(notification_fd);
179 notification_fd = -1;
182 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
184 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
185 exit_status.get_exit_status());
187 else if (was_signalled) {
188 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
189 exit_status.get_term_sig());
193 if (service_state == service_state_t::STARTING) {
194 // If state is STARTING, we must be waiting for readiness notification; the process has
195 // terminated before becoming ready.
196 stop_reason = stopped_reason_t::FAILED;
199 else if (service_state == service_state_t::STOPPING) {
200 // We won't log a non-zero exit status or termination due to signal here -
201 // we assume that the process died because we signalled it.
202 if (stop_timer_armed) {
203 restart_timer.stop_timer(event_loop);
207 else if (smooth_recovery && service_state == service_state_t::STARTED
208 && get_target_state() == service_state_t::STARTED) {
209 do_smooth_recovery();
213 stop_reason = stopped_reason_t::TERMINATED;
216 services->process_queues();
219 void process_service::exec_failed(int errcode) noexcept
221 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
223 if (notification_fd != -1) {
224 readiness_watcher.deregister(event_loop);
225 bp_sys::close(notification_fd);
226 notification_fd = -1;
229 if (get_state() == service_state_t::STARTING) {
230 stop_reason = stopped_reason_t::EXECFAILED;
234 // Process service in smooth recovery:
235 stop_reason = stopped_reason_t::TERMINATED;
240 void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
243 bool did_exit = exit_status.did_exit();
244 bool was_signalled = exit_status.was_signalled();
245 auto service_state = get_state();
247 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
249 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
250 exit_status.get_exit_status());
252 else if (was_signalled) {
253 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
254 exit_status.get_term_sig());
258 // This may be a "smooth recovery" where we are restarting the process while leaving the
259 // service in the STARTED state.
260 if (restarting && service_state == service_state_t::STARTED) {
262 bool need_stop = false;
263 if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
267 // We need to re-read the PID, since it has now changed.
268 if (pid_file.length() != 0) {
269 auto pid_result = read_pid_file(&exit_status);
270 switch (pid_result) {
271 case pid_result_t::FAILED:
272 // Failed startup: no auto-restart.
275 case pid_result_t::TERMINATED:
277 case pid_result_t::OK:
284 // Failed startup: no auto-restart.
285 stop_reason = stopped_reason_t::TERMINATED;
287 services->process_queues();
294 if (service_state == service_state_t::STARTING) {
295 // POSIX requires that if the process exited clearly with a status code of 0,
296 // the exit status value will be 0:
297 if (exit_status.did_exit_clean()) {
298 auto pid_result = read_pid_file(&exit_status);
299 switch (pid_result) {
300 case pid_result_t::FAILED:
301 // Failed startup: no auto-restart.
302 stop_reason = stopped_reason_t::FAILED;
305 case pid_result_t::TERMINATED:
306 // started, but immediately terminated
309 case pid_result_t::OK:
315 stop_reason = stopped_reason_t::FAILED;
319 else if (service_state == service_state_t::STOPPING) {
320 // We won't log a non-zero exit status or termination due to signal here -
321 // we assume that the process died because we signalled it.
325 // we must be STARTED
326 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
327 do_smooth_recovery();
330 if (! do_auto_restart() && start_explicit) {
331 start_explicit = false;
334 stop_reason = stopped_reason_t::TERMINATED;
339 services->process_queues();
342 void bgproc_service::exec_failed(int errcode) noexcept
344 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
345 // Only time we execute is for startup:
346 stop_reason = stopped_reason_t::EXECFAILED;
350 void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
352 bool did_exit = exit_status.did_exit();
353 bool was_signalled = exit_status.was_signalled();
354 auto service_state = get_state();
356 // For a scripted service, a termination occurs in one of three main cases:
357 // - the start script completed (or failed), when service was STARTING
358 // - the start script was interrupted to cancel startup; state is STOPPING
359 // - the stop script complete (or failed), state is STOPPING
361 if (service_state == service_state_t::STOPPING) {
362 // We might be running the stop script, or we might be running the start script and have issued
363 // a cancel order via SIGINT:
364 if (interrupting_start) {
365 // We issued a start interrupt, so we expected this failure:
366 if (did_exit && exit_status.get_exit_status() != 0) {
367 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
368 exit_status.get_exit_status());
369 // Assume that a command terminating normally requires no cleanup:
374 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
375 exit_status.get_term_sig());
377 // If the start script completed successfully, or was interrupted via our signal,
378 // we want to run the stop script to clean up:
381 interrupting_start = false;
383 else if (exit_status.did_exit_clean()) {
384 // We were running the stop script and finished successfully
388 // ??? failed to stop! Let's log it as warning:
390 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
391 exit_status.get_exit_status());
393 else if (was_signalled) {
394 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
395 exit_status.get_term_sig());
397 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
398 // can be stopped. There's not really any other useful course of action here.
401 services->process_queues();
404 if (exit_status.did_exit_clean()) {
407 else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
408 // A skippable service can be skipped by interrupting (eg by ^C if the service
409 // starts on the console).
410 start_skipped = true;
416 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
417 exit_status.get_exit_status());
419 else if (was_signalled) {
420 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
421 exit_status.get_term_sig());
423 stop_reason = stopped_reason_t::FAILED;
426 services->process_queues();
430 void scripted_service::exec_failed(int errcode) noexcept
432 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
433 auto service_state = get_state();
434 if (service_state == service_state_t::STARTING) {
435 stop_reason = stopped_reason_t::EXECFAILED;
438 else if (service_state == service_state_t::STOPPING) {
439 // We've logged the failure, but it's probably better not to leave the service in
445 bgproc_service::pid_result_t
446 bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
448 const char *pid_file_c = pid_file.c_str();
449 int fd = open(pid_file_c, O_CLOEXEC);
451 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
452 return pid_result_t::FAILED;
455 char pidbuf[21]; // just enough to hold any 64-bit integer
456 int r = complete_read(fd, pidbuf, 20);
458 // Could not read from PID file
459 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
461 return pid_result_t::FAILED;
465 pidbuf[r] = 0; // store nul terminator
467 bool valid_pid = false;
469 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
470 if (v <= std::numeric_limits<pid_t>::max()) {
475 catch (std::out_of_range &exc) {
478 catch (std::invalid_argument &exc) {
479 // Ok, so it doesn't look like a number: proceed...
483 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
484 if (wait_r == -1 && errno == ECHILD) {
485 // We can't track this child - check process exists:
486 if (kill(pid, 0) == 0 || errno != ESRCH) {
487 tracking_child = false;
488 return pid_result_t::OK;
491 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
493 return pid_result_t::FAILED;
496 else if (wait_r == pid) {
498 return pid_result_t::TERMINATED;
500 else if (wait_r == 0) {
501 // We can track the child
502 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
503 tracking_child = true;
504 reserved_child_watch = true;
505 return pid_result_t::OK;
509 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
511 return pid_result_t::FAILED;
514 void process_service::bring_down() noexcept
516 if (waiting_for_execstat) {
517 // The process is still starting. This should be uncommon, but can occur during
518 // smooth recovery. We can't do much now; we have to wait until we get the
519 // status, and then act appropriately.
522 else if (pid != -1) {
523 // The process is still kicking on - must actually kill it. We signal the process
524 // group (-pid) rather than just the process as there's less risk then of creating
525 // an orphaned process group:
526 if (! onstart_flags.no_sigterm) {
529 if (term_signal != -1) {
530 kill_pg(term_signal);
533 // If there's a stop timeout, arm the timer now:
534 if (stop_timeout != time_val(0,0)) {
535 restart_timer.arm_timer_rel(event_loop, stop_timeout);
536 stop_timer_armed = true;
539 // The rest is done in handle_exit_status.
542 // The process is already dead.
547 void bgproc_service::bring_down() noexcept
550 // The process is still kicking on - must actually kill it. We signal the process
551 // group (-pid) rather than just the process as there's less risk then of creating
552 // an orphaned process group:
553 if (! onstart_flags.no_sigterm) {
556 if (term_signal != -1) {
557 kill_pg(term_signal);
560 // In most cases, the rest is done in handle_exit_status.
561 // If we are a BGPROCESS and the process is not our immediate child, however, that
562 // won't work - check for this now:
563 if (! tracking_child) {
566 else if (stop_timeout != time_val(0,0)) {
567 restart_timer.arm_timer_rel(event_loop, stop_timeout);
568 stop_timer_armed = true;
572 // The process is already dead.
577 void scripted_service::bring_down() noexcept
580 // We're already running the stop script; nothing to do.
584 if (stop_command.length() == 0) {
587 else if (! start_ps_process(stop_arg_parts, false)) {
588 // Couldn't execute stop script, but there's not much we can do:
592 // successfully started stop script: start kill timer:
593 if (stop_timeout != time_val(0,0)) {
594 restart_timer.arm_timer_rel(event_loop, stop_timeout);
595 stop_timer_armed = true;
600 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
602 service->timer_expired();
604 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
605 return dasynq::rearm::NOOP;