4 #include <sys/socket.h>
7 #include "dinit-socket.h"
8 #include "dinit-util.h"
10 #include "proc-service.h"
13 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
15 * See proc-service.h header for interface details.
18 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
19 // store a null terminator for the argument. Return a `char *` vector containing the beginning
20 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
22 std::vector<const char *> separate_args(std::string &s,
23 const std::list<std::pair<unsigned,unsigned>> &arg_indices)
25 std::vector<const char *> r;
26 r.reserve(arg_indices.size() + 1);
28 // First store nul terminator for each part:
29 for (auto index_pair : arg_indices) {
30 if (index_pair.second < s.length()) {
31 s[index_pair.second] = 0;
35 // Now we can get the C string (c_str) and store offsets into it:
36 const char * cstr = s.c_str();
37 for (auto index_pair : arg_indices) {
38 r.push_back(cstr + index_pair.first);
44 void process_service::exec_succeeded() noexcept
46 // This could be a smooth recovery (state already STARTED). Even more, the process
47 // might be stopped (and killed via a signal) during smooth recovery. We don't to
48 // process startup again in either case, so we check for state STARTING:
49 if (get_state() == service_state_t::STARTING) {
50 if (force_notification_fd != -1 || !notification_var.empty()) {
51 // Wait for readiness notification:
52 readiness_watcher.set_enabled(event_loop, true);
58 else if (get_state() == service_state_t::STOPPING) {
59 // stopping, but smooth recovery was in process. That's now over so we can
60 // commence normal stop. Note that if pid == -1 the process already stopped(!),
61 // that's handled below.
62 if (pid != -1 && stop_check_dependents()) {
68 void scripted_service::exec_succeeded() noexcept
70 // For a scripted service, this means nothing other than that the start/stop
71 // script will now begin.
74 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
76 base_process_service *sr = service;
77 sr->waiting_for_execstat = false;
80 int r = read(get_watched_fd(), &exec_status, sizeof(int));
82 close(get_watched_fd());
85 // We read an errno code; exec() failed, and the service startup failed.
87 sr->child_listener.deregister(event_loop, sr->pid);
88 sr->reserved_child_watch = false;
89 if (sr->stop_timer_armed) {
90 sr->restart_timer.stop_timer(loop);
91 sr->stop_timer_armed = false;
95 sr->exec_failed(exec_status);
101 // Somehow the process managed to complete before we even saw the exec() status.
102 sr->handle_exit_status(sr->exit_status);
106 sr->services->process_queues();
108 return rearm::REMOVED;
111 rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
114 if (service->get_state() == service_state_t::STARTING) {
115 // can we actually read anything from the notification pipe?
116 int r = bp_sys::read(fd, buf, sizeof(buf));
120 else if (r == 0 || errno != EAGAIN) {
121 service->failed_to_start(false, false);
122 service->set_state(service_state_t::STOPPING);
123 service->bring_down();
127 // Just keep consuming data from the pipe:
128 int r = bp_sys::read(fd, buf, sizeof(buf));
130 // Process closed write end or terminated
132 service->notification_fd = -1;
133 return rearm::DISARM;
139 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
141 base_process_service *sr = service;
144 sr->exit_status = bp_sys::exit_status(status);
146 // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
147 // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
148 // didn't, there's not much we can do.
150 if (sr->waiting_for_execstat) {
151 // We still don't have an exec() status from the forked child, wait for that
152 // before doing any further processing.
153 return dasynq::rearm::NOOP; // hold watch reservation
156 // Must stop watch now since handle_exit_status might result in re-launch:
157 // (stop_watch instead of deregister, so that we hold watch reservation).
160 if (sr->stop_timer_armed) {
161 sr->restart_timer.stop_timer(loop);
162 sr->stop_timer_armed = false;
165 sr->handle_exit_status(bp_sys::exit_status(status));
166 return dasynq::rearm::NOOP;
169 void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
171 bool did_exit = exit_status.did_exit();
172 bool was_signalled = exit_status.was_signalled();
174 auto service_state = get_state();
176 if (notification_fd != -1) {
177 readiness_watcher.deregister(event_loop);
178 bp_sys::close(notification_fd);
179 notification_fd = -1;
182 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
184 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
185 exit_status.get_exit_status());
187 else if (was_signalled) {
188 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
189 exit_status.get_term_sig());
194 if (*inittab_id || *inittab_line) {
195 clear_utmp_entry(inittab_id, inittab_line);
199 if (service_state == service_state_t::STARTING) {
200 // If state is STARTING, we must be waiting for readiness notification; the process has
201 // terminated before becoming ready.
202 stop_reason = stopped_reason_t::FAILED;
205 else if (service_state == service_state_t::STOPPING) {
206 // We won't log a non-zero exit status or termination due to signal here -
207 // we assume that the process died because we signalled it.
208 if (stop_timer_armed) {
209 restart_timer.stop_timer(event_loop);
213 else if (smooth_recovery && service_state == service_state_t::STARTED
214 && get_target_state() == service_state_t::STARTED) {
215 do_smooth_recovery();
219 stop_reason = stopped_reason_t::TERMINATED;
222 services->process_queues();
225 void process_service::exec_failed(int errcode) noexcept
227 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
229 if (notification_fd != -1) {
230 readiness_watcher.deregister(event_loop);
231 bp_sys::close(notification_fd);
232 notification_fd = -1;
235 if (get_state() == service_state_t::STARTING) {
236 stop_reason = stopped_reason_t::EXECFAILED;
240 // Process service in smooth recovery:
241 stop_reason = stopped_reason_t::TERMINATED;
246 void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
249 bool did_exit = exit_status.did_exit();
250 bool was_signalled = exit_status.was_signalled();
251 auto service_state = get_state();
253 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
255 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
256 exit_status.get_exit_status());
258 else if (was_signalled) {
259 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
260 exit_status.get_term_sig());
264 // This may be a "smooth recovery" where we are restarting the process while leaving the
265 // service in the STARTED state.
266 if (restarting && service_state == service_state_t::STARTED) {
268 bool need_stop = false;
269 if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
273 // We need to re-read the PID, since it has now changed.
274 if (pid_file.length() != 0) {
275 auto pid_result = read_pid_file(&exit_status);
276 switch (pid_result) {
277 case pid_result_t::FAILED:
278 // Failed startup: no auto-restart.
281 case pid_result_t::TERMINATED:
283 case pid_result_t::OK:
290 // Failed startup: no auto-restart.
291 stop_reason = stopped_reason_t::TERMINATED;
293 services->process_queues();
300 if (service_state == service_state_t::STARTING) {
301 // POSIX requires that if the process exited clearly with a status code of 0,
302 // the exit status value will be 0:
303 if (exit_status.did_exit_clean()) {
304 auto pid_result = read_pid_file(&exit_status);
305 switch (pid_result) {
306 case pid_result_t::FAILED:
307 // Failed startup: no auto-restart.
308 stop_reason = stopped_reason_t::FAILED;
311 case pid_result_t::TERMINATED:
312 // started, but immediately terminated
315 case pid_result_t::OK:
321 stop_reason = stopped_reason_t::FAILED;
325 else if (service_state == service_state_t::STOPPING) {
326 // We won't log a non-zero exit status or termination due to signal here -
327 // we assume that the process died because we signalled it.
331 // we must be STARTED
332 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
333 do_smooth_recovery();
336 if (! do_auto_restart() && start_explicit) {
337 start_explicit = false;
340 stop_reason = stopped_reason_t::TERMINATED;
345 services->process_queues();
348 void bgproc_service::exec_failed(int errcode) noexcept
350 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
351 // Only time we execute is for startup:
352 stop_reason = stopped_reason_t::EXECFAILED;
356 void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
358 bool did_exit = exit_status.did_exit();
359 bool was_signalled = exit_status.was_signalled();
360 auto service_state = get_state();
362 // For a scripted service, a termination occurs in one of three main cases:
363 // - the start script completed (or failed), when service was STARTING
364 // - the start script was interrupted to cancel startup; state is STOPPING
365 // - the stop script complete (or failed), state is STOPPING
367 if (service_state == service_state_t::STOPPING) {
368 // We might be running the stop script, or we might be running the start script and have issued
369 // a cancel order via SIGINT:
370 if (interrupting_start) {
371 // We issued a start interrupt, so we expected this failure:
372 if (did_exit && exit_status.get_exit_status() != 0) {
373 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
374 exit_status.get_exit_status());
375 // Assume that a command terminating normally requires no cleanup:
380 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
381 exit_status.get_term_sig());
383 // If the start script completed successfully, or was interrupted via our signal,
384 // we want to run the stop script to clean up:
387 interrupting_start = false;
389 else if (exit_status.did_exit_clean()) {
390 // We were running the stop script and finished successfully
394 // ??? failed to stop! Let's log it as warning:
396 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
397 exit_status.get_exit_status());
399 else if (was_signalled) {
400 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
401 exit_status.get_term_sig());
403 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
404 // can be stopped. There's not really any other useful course of action here.
407 services->process_queues();
410 if (exit_status.did_exit_clean()) {
413 else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
414 // A skippable service can be skipped by interrupting (eg by ^C if the service
415 // starts on the console).
416 start_skipped = true;
422 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
423 exit_status.get_exit_status());
425 else if (was_signalled) {
426 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
427 exit_status.get_term_sig());
429 stop_reason = stopped_reason_t::FAILED;
432 services->process_queues();
436 void scripted_service::exec_failed(int errcode) noexcept
438 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
439 auto service_state = get_state();
440 if (service_state == service_state_t::STARTING) {
441 stop_reason = stopped_reason_t::EXECFAILED;
444 else if (service_state == service_state_t::STOPPING) {
445 // We've logged the failure, but it's probably better not to leave the service in
451 bgproc_service::pid_result_t
452 bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
454 const char *pid_file_c = pid_file.c_str();
455 int fd = open(pid_file_c, O_CLOEXEC);
457 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
458 return pid_result_t::FAILED;
461 char pidbuf[21]; // just enough to hold any 64-bit integer
462 int r = complete_read(fd, pidbuf, 20);
464 // Could not read from PID file
465 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
467 return pid_result_t::FAILED;
471 pidbuf[r] = 0; // store nul terminator
473 bool valid_pid = false;
475 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
476 if (v <= std::numeric_limits<pid_t>::max()) {
481 catch (std::out_of_range &exc) {
484 catch (std::invalid_argument &exc) {
485 // Ok, so it doesn't look like a number: proceed...
489 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
490 if (wait_r == -1 && errno == ECHILD) {
491 // We can't track this child - check process exists:
492 if (kill(pid, 0) == 0 || errno != ESRCH) {
493 tracking_child = false;
494 return pid_result_t::OK;
497 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
499 return pid_result_t::FAILED;
502 else if (wait_r == pid) {
504 return pid_result_t::TERMINATED;
506 else if (wait_r == 0) {
507 // We can track the child
508 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
509 tracking_child = true;
510 reserved_child_watch = true;
511 return pid_result_t::OK;
515 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
517 return pid_result_t::FAILED;
520 void process_service::bring_down() noexcept
522 if (waiting_for_execstat) {
523 // The process is still starting. This should be uncommon, but can occur during
524 // smooth recovery. We can't do much now; we have to wait until we get the
525 // status, and then act appropriately.
528 else if (pid != -1) {
529 // The process is still kicking on - must actually kill it. We signal the process
530 // group (-pid) rather than just the process as there's less risk then of creating
531 // an orphaned process group:
532 if (! onstart_flags.no_sigterm) {
535 if (term_signal != -1) {
536 kill_pg(term_signal);
539 // If there's a stop timeout, arm the timer now:
540 if (stop_timeout != time_val(0,0)) {
541 restart_timer.arm_timer_rel(event_loop, stop_timeout);
542 stop_timer_armed = true;
545 // The rest is done in handle_exit_status.
548 // The process is already dead.
553 void bgproc_service::bring_down() noexcept
556 // The process is still kicking on - must actually kill it. We signal the process
557 // group (-pid) rather than just the process as there's less risk then of creating
558 // an orphaned process group:
559 if (! onstart_flags.no_sigterm) {
562 if (term_signal != -1) {
563 kill_pg(term_signal);
566 // In most cases, the rest is done in handle_exit_status.
567 // If we are a BGPROCESS and the process is not our immediate child, however, that
568 // won't work - check for this now:
569 if (! tracking_child) {
572 else if (stop_timeout != time_val(0,0)) {
573 restart_timer.arm_timer_rel(event_loop, stop_timeout);
574 stop_timer_armed = true;
578 // The process is already dead.
583 void scripted_service::bring_down() noexcept
586 // We're already running the stop script; nothing to do.
590 if (stop_command.length() == 0) {
593 else if (! start_ps_process(stop_arg_parts, false)) {
594 // Couldn't execute stop script, but there's not much we can do:
598 // successfully started stop script: start kill timer:
599 if (stop_timeout != time_val(0,0)) {
600 restart_timer.arm_timer_rel(event_loop, stop_timeout);
601 stop_timer_armed = true;
606 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
608 service->timer_expired();
610 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
611 return dasynq::rearm::NOOP;