2 #include <sys/socket.h>
4 #include "dinit-socket.h"
5 #include "dinit-util.h"
6 #include "proc-service.h"
8 extern eventloop_t event_loop;
10 using clock_type = dasynq::clock_type;
11 using rearm = dasynq::rearm;
12 using time_val = dasynq::time_val;
14 // Given a string and a list of pairs of (start,end) indices for each argument in that string,
15 // store a null terminator for the argument. Return a `char *` vector containing the beginning
16 // of each argument and a trailing nullptr. (The returned array is invalidated if the string is later modified).
17 std::vector<const char *> separate_args(std::string &s, std::list<std::pair<unsigned,unsigned>> &arg_indices)
19 std::vector<const char *> r;
20 r.reserve(arg_indices.size() + 1);
22 // First store nul terminator for each part:
23 for (auto index_pair : arg_indices) {
24 if (index_pair.second < s.length()) {
25 s[index_pair.second] = 0;
29 // Now we can get the C string (c_str) and store offsets into it:
30 const char * cstr = s.c_str();
31 for (auto index_pair : arg_indices) {
32 r.push_back(cstr + index_pair.first);
38 rearm exec_status_pipe_watcher::fd_event(eventloop_t &loop, int fd, int flags) noexcept
40 base_process_service *sr = service;
41 sr->waiting_for_execstat = false;
44 int r = read(get_watched_fd(), &exec_status, sizeof(int));
46 close(get_watched_fd());
49 // We read an errno code; exec() failed, and the service startup failed.
51 sr->child_listener.deregister(event_loop, sr->pid);
52 sr->reserved_child_watch = false;
53 if (sr->stop_timer_armed) {
54 sr->restart_timer.stop_timer(loop);
55 sr->stop_timer_armed = false;
59 sr->exec_failed(exec_status);
63 if (sr->get_type() == service_type_t::PROCESS) {
64 // This could be a smooth recovery (state already STARTED). Even more, the process
65 // might be stopped (and killed via a signal) during smooth recovery. We don't to
66 // process startup again in either case, so we check for state STARTING:
67 if (sr->get_state() == service_state_t::STARTING) {
70 else if (sr->get_state() == service_state_t::STOPPING) {
71 // stopping, but smooth recovery was in process. That's now over so we can
72 // commence normal stop. Note that if pid == -1 the process already stopped(!),
73 // that's handled below.
74 if (sr->pid != -1 && sr->stop_check_dependents()) {
81 // Somehow the process managed to complete before we even saw the status.
82 sr->handle_exit_status(sr->exit_status);
86 sr->services->process_queues();
88 return rearm::REMOVED;
91 dasynq::rearm service_child_watcher::status_change(eventloop_t &loop, pid_t child, int status) noexcept
93 base_process_service *sr = service;
96 sr->exit_status = status;
98 // Ok, for a process service, any process death which we didn't rig
99 // ourselves is a bit... unexpected. Probably, the child died because
100 // we asked it to (sr->service_state == STOPPING). But even if
101 // we didn't, there's not much we can do.
103 if (sr->waiting_for_execstat) {
104 // We still don't have an exec() status from the forked child, wait for that
105 // before doing any further processing.
106 return dasynq::rearm::NOOP; // hold watch reservation
109 // Must stop watch now since handle_exit_status might result in re-launch:
110 // (stop_watch instead of deregister, so that we hold watch reservation).
113 if (sr->stop_timer_armed) {
114 sr->restart_timer.stop_timer(loop);
115 sr->stop_timer_armed = false;
118 sr->handle_exit_status(status);
119 return dasynq::rearm::NOOP;
122 void process_service::handle_exit_status(int exit_status) noexcept
124 bool did_exit = WIFEXITED(exit_status);
125 bool was_signalled = WIFSIGNALED(exit_status);
127 auto service_state = get_state();
129 if (exit_status != 0 && service_state != service_state_t::STOPPING) {
131 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
132 WEXITSTATUS(exit_status));
134 else if (was_signalled) {
135 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
136 WTERMSIG(exit_status));
140 if (service_state == service_state_t::STARTING) {
141 if (did_exit && WEXITSTATUS(exit_status) == 0) {
148 else if (service_state == service_state_t::STOPPING) {
149 // We won't log a non-zero exit status or termination due to signal here -
150 // we assume that the process died because we signalled it.
153 else if (smooth_recovery && service_state == service_state_t::STARTED
154 && get_target_state() == service_state_t::STARTED) {
155 do_smooth_recovery();
161 services->process_queues();
164 void process_service::exec_failed(int errcode) noexcept
166 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
167 if (get_state() == service_state_t::STARTING) {
171 // Process service in smooth recovery:
176 void bgproc_service::handle_exit_status(int exit_status) noexcept
179 bool did_exit = WIFEXITED(exit_status);
180 bool was_signalled = WIFSIGNALED(exit_status);
181 auto service_state = get_state();
183 if (exit_status != 0 && service_state != service_state_t::STOPPING) {
185 log(loglevel_t::ERROR, "Service ", get_name(), " process terminated with exit code ",
186 WEXITSTATUS(exit_status));
188 else if (was_signalled) {
189 log(loglevel_t::ERROR, "Service ", get_name(), " terminated due to signal ",
190 WTERMSIG(exit_status));
194 // This may be a "smooth recovery" where we are restarting the process while leaving the
195 // service in the STARTED state.
196 if (restarting && service_state == service_state_t::STARTED) {
198 bool need_stop = false;
199 if ((did_exit && WEXITSTATUS(exit_status) != 0) || was_signalled) {
203 // We need to re-read the PID, since it has now changed.
204 if (pid_file.length() != 0) {
205 auto pid_result = read_pid_file(&exit_status);
206 switch (pid_result) {
207 case pid_result_t::FAILED:
208 // Failed startup: no auto-restart.
211 case pid_result_t::TERMINATED:
213 case pid_result_t::OK:
220 // Failed startup: no auto-restart.
222 services->process_queues();
229 if (service_state == service_state_t::STARTING) {
230 // POSIX requires that if the process exited clearly with a status code of 0,
231 // the exit status value will be 0:
232 if (exit_status == 0) {
233 auto pid_result = read_pid_file(&exit_status);
234 switch (pid_result) {
235 case pid_result_t::FAILED:
236 // Failed startup: no auto-restart.
239 case pid_result_t::TERMINATED:
240 // started, but immediately terminated
243 case pid_result_t::OK:
252 else if (service_state == service_state_t::STOPPING) {
253 // We won't log a non-zero exit status or termination due to signal here -
254 // we assume that the process died because we signalled it.
258 // we must be STARTED
259 if (smooth_recovery && get_target_state() == service_state_t::STARTED) {
260 do_smooth_recovery();
263 if (! do_auto_restart() && start_explicit) {
264 start_explicit = false;
271 services->process_queues();
274 void bgproc_service::exec_failed(int errcode) noexcept
276 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
277 // Only time we execute is for startup:
281 void scripted_service::handle_exit_status(int exit_status) noexcept
283 bool did_exit = WIFEXITED(exit_status);
284 bool was_signalled = WIFSIGNALED(exit_status);
285 auto service_state = get_state();
287 // For a scripted service, a termination occurs in one of three main cases:
288 // - the start script completed (or failed), when service was STARTING
289 // - the start script was interrupted to cancel startup; state is STOPPING
290 // - the stop script complete (or failed), state is STOPPING
292 if (service_state == service_state_t::STOPPING) {
293 // We might be running the stop script, or we might be running the start script and have issued
294 // a cancel order via SIGINT:
295 if (did_exit && WEXITSTATUS(exit_status) == 0) {
296 if (interrupting_start) {
297 interrupting_start = false;
298 // launch stop script:
302 // We were running the stop script and finished successfully
307 if (interrupting_start) {
308 // We issued a start interrupt, so we expected this failure:
310 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled; exit code ",
311 WEXITSTATUS(exit_status));
313 else if (was_signalled) {
314 log(loglevel_t::INFO, "Service ", get_name(), " start cancelled from signal ",
315 WTERMSIG(exit_status));
319 // ??? failed to stop! Let's log it as warning:
321 log(loglevel_t::WARN, "Service ", get_name(), " stop command failed with exit code ",
322 WEXITSTATUS(exit_status));
324 else if (was_signalled) {
325 log(loglevel_t::WARN, "Service ", get_name(), " stop command terminated due to signal ",
326 WTERMSIG(exit_status));
329 // Even if the stop script failed, assume that service is now stopped, so that any dependencies
330 // can be stopped. There's not really any other useful course of action here.
331 interrupting_start = false;
334 services->process_queues();
337 if (exit_status == 0) {
343 log(loglevel_t::ERROR, "Service ", get_name(), " command failed with exit code ",
344 WEXITSTATUS(exit_status));
346 else if (was_signalled) {
347 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
348 WTERMSIG(exit_status));
352 services->process_queues();
356 void scripted_service::exec_failed(int errcode) noexcept
358 log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
359 auto service_state = get_state();
360 if (service_state == service_state_t::STARTING) {
363 else if (service_state == service_state_t::STOPPING) {
364 // We've logged the failure, but it's probably better not to leave the service in
370 bgproc_service::pid_result_t
371 bgproc_service::read_pid_file(int *exit_status) noexcept
373 const char *pid_file_c = pid_file.c_str();
374 int fd = open(pid_file_c, O_CLOEXEC);
376 log(loglevel_t::ERROR, get_name(), ": read pid file: ", strerror(errno));
377 return pid_result_t::FAILED;
380 char pidbuf[21]; // just enough to hold any 64-bit integer
381 int r = ss_read(fd, pidbuf, 20);
383 // Could not read from PID file
384 log(loglevel_t::ERROR, get_name(), ": could not read from pidfile; ", strerror(errno));
386 return pid_result_t::FAILED;
390 pidbuf[r] = 0; // store nul terminator
392 bool valid_pid = false;
394 unsigned long long v = std::stoull(pidbuf, nullptr, 0);
395 if (v <= std::numeric_limits<pid_t>::max()) {
400 catch (std::out_of_range &exc) {
403 catch (std::invalid_argument &exc) {
404 // Ok, so it doesn't look like a number: proceed...
408 pid_t wait_r = waitpid(pid, exit_status, WNOHANG);
409 if (wait_r == -1 && errno == ECHILD) {
410 // We can't track this child - check process exists:
411 if (kill(pid, 0) == 0 || errno != ESRCH) {
412 tracking_child = false;
413 return pid_result_t::OK;
416 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
418 return pid_result_t::FAILED;
421 else if (wait_r == pid) {
423 return pid_result_t::TERMINATED;
425 else if (wait_r == 0) {
426 // We can track the child
427 child_listener.add_reserved(event_loop, pid, dasynq::DEFAULT_PRIORITY - 10);
428 tracking_child = true;
429 reserved_child_watch = true;
430 return pid_result_t::OK;
434 log(loglevel_t::ERROR, get_name(), ": pid read from pidfile (", pid, ") is not valid");
436 return pid_result_t::FAILED;
439 void process_service::bring_down() noexcept
441 waiting_for_deps = false;
442 if (waiting_for_execstat) {
443 // The process is still starting. This should be uncommon, but can occur during
444 // smooth recovery. We can't do much now; we have to wait until we get the
445 // status, and then act appropriately.
448 else if (pid != -1) {
449 // The process is still kicking on - must actually kill it. We signal the process
450 // group (-pid) rather than just the process as there's less risk then of creating
451 // an orphaned process group:
452 if (! onstart_flags.no_sigterm) {
455 if (term_signal != -1) {
456 kill_pg(term_signal);
459 // In most cases, the rest is done in handle_exit_status.
460 // If we are a BGPROCESS and the process is not our immediate child, however, that
461 // won't work - check for this now:
462 if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
465 else if (stop_timeout != time_val(0,0)) {
466 restart_timer.arm_timer_rel(event_loop, stop_timeout);
467 stop_timer_armed = true;
471 // The process is already dead.
476 void scripted_service::bring_down() noexcept
478 waiting_for_deps = false;
479 if (stop_command.length() == 0) {
482 else if (! start_ps_process(stop_arg_parts, false)) {
483 // Couldn't execute stop script, but there's not much we can do:
487 // successfully started stop script: start kill timer:
488 if (stop_timeout != time_val(0,0)) {
489 restart_timer.arm_timer_rel(event_loop, stop_timeout);
490 stop_timer_armed = true;
495 dasynq::rearm process_restart_timer::timer_expiry(eventloop_t &, int expiry_count)
497 service->stop_timer_armed = false;
500 // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
501 // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
502 // including smooth recovery (restart timeout, state is STARTING or STARTED).
503 if (service->get_state() == service_state_t::STOPPING) {
504 service->kill_with_fire();
506 else if (service->pid != -1) {
507 // Starting, start timed out.
508 service->stop_dependents();
509 service->interrupt_start();
512 // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
513 service->do_restart();
516 // Leave the timer disabled, or, if it has been reset by any processing above, leave it armed:
517 return dasynq::rearm::NOOP;