5 #include <sys/socket.h>
8 #include "dinit-socket.h"
9 #include "dinit-util.h"
10 #include "dinit-log.h"
11 #include "proc-service.h"
14 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
16 * See proc-service.h header for interface details.
19 // Strings describing the execution stages (failure points).
20 const char * const exec_stage_descriptions[static_cast<int>(exec_stage::DO_EXEC) + 1] = {
21 "arranging file descriptors", // ARRANGE_FDS
22 "reading environment file", // READ_ENV_FILE
23 "setting environment variable", // SET_NOTIFYFD_VAR
24 "setting up activation socket", // SETUP_ACTIVATION_SOCKET
25 "setting up control socket", // SETUP_CONTROL_SOCKET
26 "changing directory", // CHDIR
27 "setting up standard input/output descriptors", // SETUP_STDINOUTERR
28 "setting resource limits", // SET_RLIMITS
29 "setting user/group ID", // SET_UIDGID
30 "executing command" // DO_EXEC
33 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
34 // store a null terminator for the argument. Return a `char *` vector containing the beginning
35 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is
37 std::vector<const char *> separate_args(std::string &s,
38 const std::list<std::pair<unsigned,unsigned>> &arg_indices)
40 std::vector<const char *> r;
41 r.reserve(arg_indices.size() + 1);
43 // First store nul terminator for each part:
44 for (auto index_pair : arg_indices) {
45 if (index_pair.second < s.length()) {
46 s[index_pair.second] = 0;
50 // Now we can get the C string (c_str) and store offsets into it:
51 const char * cstr = s.c_str();
52 for (auto index_pair : arg_indices) {
53 r.push_back(cstr + index_pair.first);
59 void process_service::exec_succeeded() noexcept
61 // This could be a smooth recovery (state already STARTED). Even more, the process
62 // might be stopped (and killed via a signal) during smooth recovery. We don't to
63 // process startup again in either case, so we check for state STARTING:
64 if (get_state() == service_state_t::STARTING) {
65 if (force_notification_fd != -1 || !notification_var.empty()) {
66 // Wait for readiness notification:
67 readiness_watcher.set_enabled(event_loop, true);
73 else if (get_state() == service_state_t::STOPPING) {
74 // stopping, but smooth recovery was in process. That's now over so we can
75 // commence normal stop. Note that if pid == -1 the process already stopped(!),
76 // that's handled below.
77 if (pid != -1 && stop_check_dependents()) {
83 void scripted_service::exec_succeeded() noexcept
85 // For a scripted service, this means nothing other than that the start/stop
86 // script will now begin.
89 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
91 base_process_service *sr = service;
92 sr->waiting_for_execstat = false;
94 run_proc_err exec_status;
95 int r = read(get_watched_fd(), &exec_status, sizeof(exec_status));
97 close(get_watched_fd());
100 // We read an errno code; exec() failed, and the service startup failed.
102 sr->child_listener.deregister(event_loop, sr->pid);
103 sr->reserved_child_watch = false;
104 if (sr->stop_timer_armed) {
105 sr->restart_timer.stop_timer(loop);
106 sr->stop_timer_armed = false;
110 sr->exec_failed(exec_status);
113 sr->exec_succeeded();
116 // Somehow the process managed to complete before we even saw the exec() status.
117 sr->handle_exit_status(sr->exit_status);
121 sr->services->process_queues();
123 return rearm::REMOVED;
126 rearm ready_notify_watcher::fd_event(eventloop_t &, int fd, int flags) noexcept
129 if (service->get_state() == service_state_t::STARTING) {
130 // can we actually read anything from the notification pipe?
131 int r = bp_sys::read(fd, buf, sizeof(buf));
135 else if (r == 0 || errno != EAGAIN) {
136 service->failed_to_start(false, false);
137 service->set_state(service_state_t::STOPPING);
138 service->bring_down();
142 // Just keep consuming data from the pipe:
143 int r = bp_sys::read(fd, buf, sizeof(buf));
145 // Process closed write end or terminated
147 service->notification_fd = -1;
148 return rearm::DISARM;
152 service->services->process_queues();
156 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
158 base_process_service *sr = service;
161 sr->exit_status = bp_sys::exit_status(status);
163 // Ok, for a process service, any process death which we didn't rig ourselves is a bit... unexpected.
164 // Probably, the child died because we asked it to (sr->service_state == STOPPING). But even if we
165 // didn't, there's not much we can do.
167 if (sr->waiting_for_execstat) {
168 // We still don't have an exec() status from the forked child, wait for that
169 // before doing any further processing.
170 return dasynq::rearm::NOOP; // hold watch reservation
173 // Must stop watch now since handle_exit_status might result in re-launch:
174 // (stop_watch instead of deregister, so that we hold watch reservation).
177 if (sr->stop_timer_armed) {
178 sr->restart_timer.stop_timer(loop);
179 sr->stop_timer_armed = false;
182 sr->handle_exit_status(bp_sys::exit_status(status));
183 return dasynq::rearm::NOOP;
186 void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
188 bool did_exit = exit_status.did_exit();
189 bool was_signalled = exit_status.was_signalled();
190 auto service_state = get_state();
192 if (notification_fd != -1) {
193 readiness_watcher.deregister(event_loop);
194 bp_sys::close(notification_fd);
195 notification_fd = -1;
198 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
200 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
201 exit_status.get_exit_status());
203 else if (was_signalled) {
204 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
205 exit_status.get_term_sig());
210 if (*inittab_id || *inittab_line) {
211 clear_utmp_entry(inittab_id, inittab_line);
215 if (service_state == service_state_t::STARTING) {
216 // If state is STARTING, we must be waiting for readiness notification; the process has
217 // terminated before becoming ready.
218 stop_reason = stopped_reason_t::FAILED;
221 else if (service_state == service_state_t::STOPPING) {
222 // We won't log a non-zero exit status or termination due to signal here -
223 // we assume that the process died because we signalled it.
224 if (stop_timer_armed) {
225 restart_timer.stop_timer(event_loop);
229 else if (smooth_recovery && service_state == service_state_t::STARTED
230 && get_target_state() == service_state_t::STARTED) {
231 do_smooth_recovery();
235 stop_reason = stopped_reason_t::TERMINATED;
238 services->process_queues();
241 void process_service::exec_failed(run_proc_err errcode) noexcept
243 log(loglevel_t::ERROR, get_name(), ": execution failed - ",
244 exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
246 if (notification_fd != -1) {
247 readiness_watcher.deregister(event_loop);
248 bp_sys::close(notification_fd);
249 notification_fd = -1;
252 if (get_state() == service_state_t::STARTING) {
253 stop_reason = stopped_reason_t::EXECFAILED;
257 // Process service in smooth recovery:
258 stop_reason = stopped_reason_t::TERMINATED;
263 void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
265 // For bgproc services, receiving exit status can mean one of two things:
266 // 1. We were launching the process, and it finished (possibly after forking). If it did fork
267 // we want to obtain the process id of the process that we should now monitor, the actual
269 // 2. The above has already happened, and we are monitoring the daemon process, which has now
270 // terminated for some reason.
273 bool did_exit = exit_status.did_exit();
274 bool was_signalled = exit_status.was_signalled();
275 auto service_state = get_state();
277 if (!exit_status.did_exit_clean() && service_state != service_state_t::STOPPING) {
279 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
280 exit_status.get_exit_status());
282 else if (was_signalled) {
283 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
284 exit_status.get_term_sig());
288 // This may be a "smooth recovery" where we are restarting the process while leaving the
289 // service in the STARTED state. This must be the case if 'restarting' is set while the state
290 // is currently STARTED.
291 if (restarting && service_state == service_state_t::STARTED) {
293 bool need_stop = false;
294 if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
298 // We need to re-read the PID, since it has now changed.
299 if (pid_file.length() != 0) {
300 auto pid_result = read_pid_file(&exit_status);
301 switch (pid_result) {
302 case pid_result_t::FAILED:
303 // Failed startup: no auto-restart.
306 case pid_result_t::TERMINATED:
308 case pid_result_t::OK:
315 // Failed startup: no auto-restart.
316 stop_reason = stopped_reason_t::TERMINATED;
318 services->process_queues();
324 if (service_state == service_state_t::STARTING) {
325 // POSIX requires that if the process exited clearly with a status code of 0,
326 // the exit status value will be 0:
327 if (exit_status.did_exit_clean()) {
328 auto pid_result = read_pid_file(&exit_status);
329 switch (pid_result) {
330 case pid_result_t::FAILED:
331 // Failed startup: no auto-restart.
332 stop_reason = stopped_reason_t::FAILED;
335 case pid_result_t::TERMINATED:
336 // started, but immediately terminated
339 case pid_result_t::OK:
345 stop_reason = stopped_reason_t::FAILED;
349 else if (service_state == service_state_t::STOPPING) {
350 // We won't log a non-zero exit status or termination due to signal here -
351 // we assume that the process died because we signalled it.
355 // we must be STARTED
356 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
358 do_smooth_recovery();
361 stop_reason = stopped_reason_t::TERMINATED;
366 services->process_queues();
369 void bgproc_service::exec_failed(run_proc_err errcode) noexcept
371 log(loglevel_t::ERROR, get_name(), ": execution failed - ",
372 exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
374 // Only time we execute is for startup:
375 stop_reason = stopped_reason_t::EXECFAILED;
379 void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexcept
381 bool did_exit = exit_status.did_exit();
382 bool was_signalled = exit_status.was_signalled();
383 auto service_state = get_state();
385 // For a scripted service, a termination occurs in one of three main cases:
386 // - the start script completed (or failed), when service was STARTING
387 // - the start script was interrupted to cancel startup; state is STOPPING
388 // - the stop script complete (or failed), state is STOPPING
390 if (service_state == service_state_t::STOPPING) {
391 // We might be running the stop script, or we might be running the start script and have issued
392 // a cancel order via SIGINT:
393 if (interrupting_start) {
394 if (stop_timer_armed) {
395 restart_timer.stop_timer(event_loop);
396 stop_timer_armed = false;
398 // We issued a start interrupt, so we expected this failure:
399 if (did_exit && exit_status.get_exit_status() != 0) {
400 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
401 exit_status.get_exit_status());
402 // Assume that a command terminating normally (with failure status) requires no cleanup:
407 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
408 exit_status.get_term_sig());
410 // If the start script completed successfully, or was interrupted via our signal,
411 // we want to run the stop script to clean up:
414 interrupting_start = false;
416 else if (exit_status.did_exit_clean()) {
417 // We were running the stop script and finished successfully
421 // ??? failed to stop! Let's log it as warning:
423 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
424 exit_status.get_exit_status());
426 else if (was_signalled) {
427 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
428 exit_status.get_term_sig());
430 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
431 // can be stopped. There's not really any other useful course of action here.
434 services->process_queues();
437 if (exit_status.did_exit_clean()) {
440 else if (was_signalled && exit_status.get_term_sig() == SIGINT && onstart_flags.skippable) {
441 // A skippable service can be skipped by interrupting (eg by ^C if the service
442 // starts on the console).
443 start_skipped = true;
449 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
450 exit_status.get_exit_status());
452 else if (was_signalled) {
453 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
454 exit_status.get_term_sig());
456 stop_reason = stopped_reason_t::FAILED;
459 services->process_queues();
463 void scripted_service::exec_failed(run_proc_err errcode) noexcept
465 log(loglevel_t::ERROR, get_name(), ": execution failed - ",
466 exec_stage_descriptions[static_cast<int>(errcode.stage)], ": ", strerror(errcode.st_errno));
467 auto service_state = get_state();
468 if (service_state == service_state_t::STARTING) {
469 stop_reason = stopped_reason_t::EXECFAILED;
472 else if (service_state == service_state_t::STOPPING) {
473 // We've logged the failure, but it's probably better not to leave the service in
479 // Return a value as an unsigned-type value.
480 template <typename T> typename std::make_unsigned<T>::type make_unsigned_val(T val)
482 return static_cast<typename std::make_unsigned<T>::type>(val);
485 bgproc_service::pid_result_t
486 bgproc_service::read_pid_file(bp_sys::exit_status *exit_status) noexcept
488 const char *pid_file_c = pid_file.c_str();
489 int fd = bp_sys::open(pid_file_c, O_CLOEXEC);
491 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
492 return pid_result_t::FAILED;
495 char pidbuf[21]; // just enough to hold any 64-bit integer
496 int r = complete_read(fd, pidbuf, 20);
498 // Could not read from PID file
499 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
501 return pid_result_t::FAILED;
505 pidbuf[r] = 0; // store nul terminator
507 bool valid_pid = false;
509 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
510 if (v <= make_unsigned_val(std::numeric_limits<pid_t>::max())) {
515 catch (std::out_of_range &exc) {
518 catch (std::invalid_argument &exc) {
519 // Ok, so it doesn't look like a number: proceed...
523 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
524 if (wait_r == -1 && errno == ECHILD) {
525 // We can't track this child - check process exists:
526 if (bp_sys::kill(pid, 0) == 0 || errno != ESRCH) {
527 tracking_child = false;
528 return pid_result_t::OK;
531 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
533 return pid_result_t::FAILED;
536 else if (wait_r == pid) {
538 return pid_result_t::TERMINATED;
540 else if (wait_r == 0) {
541 // We can track the child
542 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
543 tracking_child = true;
544 reserved_child_watch = true;
545 return pid_result_t::OK;
549 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
551 return pid_result_t::FAILED;
554 void process_service::bring_down() noexcept
556 if (waiting_for_execstat) {
557 // The process is still starting. This should be uncommon, but can occur during
558 // smooth recovery. We can't do much now; we have to wait until we get the
559 // status, and then act appropriately.
562 else if (pid != -1) {
563 // The process is still kicking on - must actually kill it. We signal the process
564 // group (-pid) rather than just the process as there's less risk then of creating
565 // an orphaned process group:
566 if (! onstart_flags.no_sigterm) {
569 if (term_signal != -1) {
570 kill_pg(term_signal);
573 // If there's a stop timeout, arm the timer now:
574 if (stop_timeout != time_val(0,0)) {
575 restart_timer.arm_timer_rel(event_loop, stop_timeout);
576 stop_timer_armed = true;
579 // The rest is done in handle_exit_status.
582 // The process is already dead.
587 void bgproc_service::bring_down() noexcept
590 // The process is still kicking on - must actually kill it. We signal the process
591 // group (-pid) rather than just the process as there's less risk then of creating
592 // an orphaned process group:
593 if (! onstart_flags.no_sigterm) {
596 if (term_signal != -1) {
597 kill_pg(term_signal);
600 // In most cases, the rest is done in handle_exit_status.
601 // If we are a BGPROCESS and the process is not our immediate child, however, that
602 // won't work - check for this now:
603 if (! tracking_child) {
606 else if (stop_timeout != time_val(0,0)) {
607 restart_timer.arm_timer_rel(event_loop, stop_timeout);
608 stop_timer_armed = true;
612 // The process is already dead.
617 void scripted_service::bring_down() noexcept
620 // We're already running the stop script; nothing to do.
624 if (stop_command.length() == 0) {
627 else if (! start_ps_process(stop_arg_parts, false)) {
628 // Couldn't execute stop script, but there's not much we can do:
632 // successfully started stop script: start kill timer:
633 if (stop_timeout != time_val(0,0)) {
634 restart_timer.arm_timer_rel(event_loop, stop_timeout);
635 stop_timer_armed = true;
640 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
642 service->timer_expired();
644 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
645 return dasynq::rearm::NOOP;