2 #include <sys/socket.h>
5 #include "dinit-socket.h"
6 #include "dinit-util.h"
7 #include "proc-service.h"
10 * Most of the implementation for process-based services (process, scripted, bgprocess) is here.
12 * See proc-service.h header for interface details.
15 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
16 // store a null terminator for the argument. Return a `char *` vector containing the beginning
17 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
18 std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
20 std::vector<const char *> r;
21 r.reserve(arg_indices.size() + 1);
23 // First store nul terminator for each part:
24 for (auto index_pair : arg_indices) {
25 if (index_pair.second < s.length()) {
26 s[index_pair.second] = 0;
30 // Now we can get the C string (c_str) and store offsets into it:
31 const char * cstr = s.c_str();
32 for (auto index_pair : arg_indices) {
33 r.push_back(cstr + index_pair.first);
39 void process_service::exec_succeeded() noexcept
41 // This could be a smooth recovery (state already STARTED). Even more, the process
42 // might be stopped (and killed via a signal) during smooth recovery. We don't to
43 // process startup again in either case, so we check for state STARTING:
44 if (get_state() == service_state_t::STARTING) {
47 else if (get_state() == service_state_t::STOPPING) {
48 // stopping, but smooth recovery was in process. That's now over so we can
49 // commence normal stop. Note that if pid == -1 the process already stopped(!),
50 // that's handled below.
51 if (pid != -1 && stop_check_dependents()) {
57 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
59 base_process_service *sr = service;
60 sr->waiting_for_execstat = false;
63 int r = read(get_watched_fd(), &exec_status, sizeof(int));
65 close(get_watched_fd());
68 // We read an errno code; exec() failed, and the service startup failed.
70 sr->child_listener.deregister(event_loop, sr->pid);
71 sr->reserved_child_watch = false;
72 if (sr->stop_timer_armed) {
73 sr->restart_timer.stop_timer(loop);
74 sr->stop_timer_armed = false;
78 sr->exec_failed(exec_status);
84 // Somehow the process managed to complete before we even saw the exec() status.
85 sr->handle_exit_status(sr->exit_status);
89 sr->services->process_queues();
91 return rearm::REMOVED;
94 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
96 base_process_service *sr = service;
99 sr->exit_status = status;
101 // Ok, for a process service, any process death which we didn't rig
102 // ourselves is a bit... unexpected. Probably, the child died because
103 // we asked it to (sr->service_state == STOPPING). But even if
104 // we didn't, there's not much we can do.
106 if (sr->waiting_for_execstat) {
107 // We still don't have an exec() status from the forked child, wait for that
108 // before doing any further processing.
109 return dasynq::rearm::NOOP; // hold watch reservation
112 // Must stop watch now since handle_exit_status might result in re-launch:
113 // (stop_watch instead of deregister, so that we hold watch reservation).
116 if (sr->stop_timer_armed) {
117 sr->restart_timer.stop_timer(loop);
118 sr->stop_timer_armed = false;
121 sr->handle_exit_status(status);
122 return dasynq::rearm::NOOP;
125 void process_service::handle_exit_status(int exit_status) noexcept
127 bool did_exit = WIFEXITED(exit_status);
128 bool was_signalled = WIFSIGNALED(exit_status);
130 auto service_state = get_state();
132 if (exit_status != 0 && service_state != service_state_t::STOPPING) {
134 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
135 WEXITSTATUS(exit_status));
137 else if (was_signalled) {
138 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
139 WTERMSIG(exit_status));
143 if (service_state == service_state_t::STARTING) {
144 if (did_exit && WEXITSTATUS(exit_status) == 0) {
151 else if (service_state == service_state_t::STOPPING) {
152 // We won't log a non-zero exit status or termination due to signal here -
153 // we assume that the process died because we signalled it.
156 else if (smooth_recovery && service_state == service_state_t::STARTED
157 && get_target_state() == service_state_t::STARTED) {
158 do_smooth_recovery();
164 services->process_queues();
167 void process_service::exec_failed(int errcode) noexcept
169 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
170 if (get_state() == service_state_t::STARTING) {
174 // Process service in smooth recovery:
179 void bgproc_service::handle_exit_status(int exit_status) noexcept
182 bool did_exit = WIFEXITED(exit_status);
183 bool was_signalled = WIFSIGNALED(exit_status);
184 auto service_state = get_state();
186 if (exit_status != 0 && service_state != service_state_t::STOPPING) {
188 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
189 WEXITSTATUS(exit_status));
191 else if (was_signalled) {
192 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
193 WTERMSIG(exit_status));
197 // This may be a "smooth recovery" where we are restarting the process while leaving the
198 // service in the STARTED state.
199 if (restarting && service_state == service_state_t::STARTED) {
201 bool need_stop = false;
202 if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
206 // We need to re-read the PID, since it has now changed.
207 if (pid_file.length() != 0) {
208 auto pid_result = read_pid_file(&exit_status);
209 switch (pid_result) {
210 case pid_result_t::FAILED:
211 // Failed startup: no auto-restart.
214 case pid_result_t::TERMINATED:
216 case pid_result_t::OK:
223 // Failed startup: no auto-restart.
225 services->process_queues();
232 if (service_state == service_state_t::STARTING) {
233 // POSIX requires that if the process exited clearly with a status code of 0,
234 // the exit status value will be 0:
235 if (exit_status == 0) {
236 auto pid_result = read_pid_file(&exit_status);
237 switch (pid_result) {
238 case pid_result_t::FAILED:
239 // Failed startup: no auto-restart.
242 case pid_result_t::TERMINATED:
243 // started, but immediately terminated
246 case pid_result_t::OK:
255 else if (service_state == service_state_t::STOPPING) {
256 // We won't log a non-zero exit status or termination due to signal here -
257 // we assume that the process died because we signalled it.
261 // we must be STARTED
262 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
263 do_smooth_recovery();
266 if (! do_auto_restart() && start_explicit) {
267 start_explicit = false;
274 services->process_queues();
277 void bgproc_service::exec_failed(int errcode) noexcept
279 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
280 // Only time we execute is for startup:
284 void scripted_service::handle_exit_status(int exit_status) noexcept
286 bool did_exit = WIFEXITED(exit_status);
287 bool was_signalled = WIFSIGNALED(exit_status);
288 auto service_state = get_state();
290 // For a scripted service, a termination occurs in one of three main cases:
291 // - the start script completed (or failed), when service was STARTING
292 // - the start script was interrupted to cancel startup; state is STOPPING
293 // - the stop script complete (or failed), state is STOPPING
295 if (service_state == service_state_t::STOPPING) {
296 // We might be running the stop script, or we might be running the start script and have issued
297 // a cancel order via SIGINT:
298 if (did_exit && WEXITSTATUS(exit_status) == 0) {
299 if (interrupting_start) {
300 interrupting_start = false;
301 // launch stop script:
305 // We were running the stop script and finished successfully
310 if (interrupting_start) {
311 // We issued a start interrupt, so we expected this failure:
313 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
314 WEXITSTATUS(exit_status));
316 else if (was_signalled) {
317 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
318 WTERMSIG(exit_status));
322 // ??? failed to stop! Let's log it as warning:
324 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
325 WEXITSTATUS(exit_status));
327 else if (was_signalled) {
328 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
329 WTERMSIG(exit_status));
332 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
333 // can be stopped. There's not really any other useful course of action here.
334 interrupting_start = false;
337 services->process_queues();
340 if (exit_status == 0) {
346 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
347 WEXITSTATUS(exit_status));
349 else if (was_signalled) {
350 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
351 WTERMSIG(exit_status));
355 services->process_queues();
359 void scripted_service::exec_failed(int errcode) noexcept
361 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
362 auto service_state = get_state();
363 if (service_state == service_state_t::STARTING) {
366 else if (service_state == service_state_t::STOPPING) {
367 // We've logged the failure, but it's probably better not to leave the service in
373 bgproc_service::pid_result_t
374 bgproc_service::read_pid_file(int *exit_status) noexcept
376 const char *pid_file_c = pid_file.c_str();
377 int fd = open(pid_file_c, O_CLOEXEC);
379 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
380 return pid_result_t::FAILED;
383 char pidbuf[21]; // just enough to hold any 64-bit integer
384 int r = ss_read(fd, pidbuf, 20);
386 // Could not read from PID file
387 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
389 return pid_result_t::FAILED;
393 pidbuf[r] = 0; // store nul terminator
395 bool valid_pid = false;
397 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
398 if (v <= std::numeric_limits<pid_t>::max()) {
403 catch (std::out_of_range &exc) {
406 catch (std::invalid_argument &exc) {
407 // Ok, so it doesn't look like a number: proceed...
411 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
412 if (wait_r == -1 && errno == ECHILD) {
413 // We can't track this child - check process exists:
414 if (kill(pid, 0) == 0 || errno != ESRCH) {
415 tracking_child = false;
416 return pid_result_t::OK;
419 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
421 return pid_result_t::FAILED;
424 else if (wait_r == pid) {
426 return pid_result_t::TERMINATED;
428 else if (wait_r == 0) {
429 // We can track the child
430 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
431 tracking_child = true;
432 reserved_child_watch = true;
433 return pid_result_t::OK;
437 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
439 return pid_result_t::FAILED;
442 void process_service::bring_down() noexcept
444 waiting_for_deps = false;
445 if (waiting_for_execstat) {
446 // The process is still starting. This should be uncommon, but can occur during
447 // smooth recovery. We can't do much now; we have to wait until we get the
448 // status, and then act appropriately.
451 else if (pid != -1) {
452 // The process is still kicking on - must actually kill it. We signal the process
453 // group (-pid) rather than just the process as there's less risk then of creating
454 // an orphaned process group:
455 if (! onstart_flags.no_sigterm) {
458 if (term_signal != -1) {
459 kill_pg(term_signal);
462 // In most cases, the rest is done in handle_exit_status.
463 // If we are a BGPROCESS and the process is not our immediate child, however, that
464 // won't work - check for this now:
465 if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
468 else if (stop_timeout != time_val(0,0)) {
469 restart_timer.arm_timer_rel(event_loop, stop_timeout);
470 stop_timer_armed = true;
474 // The process is already dead.
479 void scripted_service::bring_down() noexcept
481 waiting_for_deps = false;
482 if (stop_command.length() == 0) {
485 else if (! start_ps_process(stop_arg_parts, false)) {
486 // Couldn't execute stop script, but there's not much we can do:
490 // successfully started stop script: start kill timer:
491 if (stop_timeout != time_val(0,0)) {
492 restart_timer.arm_timer_rel(event_loop, stop_timeout);
493 stop_timer_armed = true;
498 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
500 service->stop_timer_armed = false;
503 // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
504 // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
505 // including smooth recovery (restart timeout, state is STARTING or STARTED).
506 if (service->get_state() == service_state_t::STOPPING) {
507 service->kill_with_fire();
509 else if (service->pid != -1) {
510 // Starting, start timed out.
511 service->stop_dependents();
512 service->interrupt_start();
515 // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
516 service->do_restart();
519 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
520 return dasynq::rearm::NOOP;