5 #include "dinit-socket.h"
6 #include "proc-service.h"
9 * Base process implementation (base_process_service).
11 * See proc-service.h for interface documentation.
14 void base_process_service::do_smooth_recovery() noexcept
16 if (! restart_ps_process()) {
18 services->process_queues();
22 bool base_process_service::bring_up() noexcept
26 return restart_ps_process();
31 event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
32 restart_interval_count = 0;
33 if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
34 if (start_timeout != time_val(0,0)) {
35 restart_timer.arm_timer_rel(event_loop, start_timeout);
36 stop_timer_armed = true;
38 else if (stop_timer_armed) {
39 restart_timer.stop_timer(event_loop);
40 stop_timer_armed = false;
48 bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
50 // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
51 // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
52 // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
53 // is written to the pipe, and the parent can read it.
55 event_loop.get_time(last_start_time, clock_type::MONOTONIC);
58 if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
59 log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
63 const char * logfile = this->logfile.c_str();
65 logfile = "/dev/null";
68 bool child_status_registered = false;
69 control_conn_t *control_conn = nullptr;
71 int control_socket[2] = {-1, -1};
72 if (onstart_flags.pass_cs_fd) {
73 if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
74 log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
78 // Make the server side socket close-on-exec:
79 int fdflags = fcntl(control_socket[0], F_GETFD);
80 fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
83 control_conn = new control_conn_t(event_loop, services, control_socket[0]);
85 catch (std::exception &exc) {
86 log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
91 // Set up complete, now fork and exec:
96 child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
97 child_status_registered = true;
99 // We specify a high priority (i.e. low priority value) so that process termination is
100 // handled early. This means we have always recorded that the process is terminated by the
101 // time that we handle events that might otherwise cause us to signal the process, so we
102 // avoid sending a signal to an invalid (and possibly recycled) process ID.
103 forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
104 reserved_child_watch = true;
106 catch (std::exception &e) {
107 log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
112 run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
116 close(pipefd[1]); // close the 'other end' fd
117 if (control_socket[1] != -1) {
118 close(control_socket[1]);
122 waiting_for_execstat = true;
129 if (child_status_registered) {
130 child_status_listener.deregister(event_loop);
133 if (onstart_flags.pass_cs_fd) {
137 close(control_socket[0]);
138 close(control_socket[1]);
148 void base_process_service::bring_down() noexcept
150 waiting_for_deps = false;
152 // The process is still kicking on - must actually kill it. We signal the process
153 // group (-pid) rather than just the process as there's less risk then of creating
154 // an orphaned process group:
155 if (! onstart_flags.no_sigterm) {
158 if (term_signal != -1) {
159 kill_pg(term_signal);
162 // In most cases, the rest is done in handle_exit_status.
163 // If we are a BGPROCESS and the process is not our immediate child, however, that
164 // won't work - check for this now:
165 if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
168 else if (stop_timeout != time_val(0,0)) {
169 restart_timer.arm_timer_rel(event_loop, stop_timeout);
170 stop_timer_armed = true;
174 // The process is already dead.
179 base_process_service::base_process_service(service_set *sset, string name,
180 service_type_t service_type_p, string &&command,
181 std::list<std::pair<unsigned,unsigned>> &command_offsets,
182 const std::list<prelim_dep> &deplist_p)
183 : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
184 child_status_listener(this), restart_timer(this)
186 program_name = std::move(command);
187 exec_arg_parts = separate_args(program_name, command_offsets);
189 restart_interval_count = 0;
190 restart_interval_time = {0, 0};
191 restart_timer.service = this;
192 restart_timer.add_timer(event_loop);
194 // By default, allow a maximum of 3 restarts within 10.0 seconds:
195 restart_interval.seconds() = 10;
196 restart_interval.nseconds() = 0;
197 max_restart_interval_count = 3;
199 waiting_restart_timer = false;
200 reserved_child_watch = false;
201 tracking_child = false;
202 stop_timer_armed = false;
203 start_is_interruptible = false;
206 void base_process_service::do_restart() noexcept
208 waiting_restart_timer = false;
209 restart_interval_count++;
210 auto service_state = get_state();
212 // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
213 // the process should be granted access to the console:
214 bool on_console = service_state == service_state_t::STARTING
215 ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
217 if (service_state == service_state_t::STARTING) {
218 // for a smooth recovery, we want to check dependencies are available before actually
220 if (! check_deps_started()) {
221 waiting_for_deps = true;
226 if (! start_ps_process(exec_arg_parts, on_console)) {
228 if (service_state == service_state_t::STARTING) {
232 // desired_state = service_state_t::STOPPED;
235 services->process_queues();
239 bool base_process_service::restart_ps_process() noexcept
241 using time_val = dasynq::time_val;
243 time_val current_time;
244 event_loop.get_time(current_time, clock_type::MONOTONIC);
246 if (max_restart_interval_count != 0) {
247 // Check whether we're still in the most recent restart check interval:
248 time_val int_diff = current_time - restart_interval_time;
249 if (int_diff < restart_interval) {
250 if (restart_interval_count >= max_restart_interval_count) {
251 log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
256 restart_interval_time = current_time;
257 restart_interval_count = 0;
261 // Check if enough time has lapsed since the prevous restart. If not, start a timer:
262 time_val tdiff = current_time - last_start_time;
263 if (restart_delay <= tdiff) {
264 // > restart delay (normally 200ms)
268 time_val timeout = restart_delay - tdiff;
269 restart_timer.arm_timer_rel(event_loop, timeout);
270 waiting_restart_timer = true;
275 bool base_process_service::interrupt_start() noexcept
277 if (waiting_restart_timer) {
278 restart_timer.stop_timer(event_loop);
279 waiting_restart_timer = false;
280 return service_record::interrupt_start();
283 log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
285 if (stop_timeout != time_val(0,0)) {
286 restart_timer.arm_timer_rel(event_loop, stop_timeout);
287 stop_timer_armed = true;
289 else if (stop_timer_armed) {
290 restart_timer.stop_timer(event_loop);
291 stop_timer_armed = false;
293 set_state(service_state_t::STOPPING);
294 notify_listeners(service_event_t::STARTCANCELLED);
299 void base_process_service::kill_with_fire() noexcept
302 log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
307 void base_process_service::kill_pg(int signo) noexcept
309 pid_t pgid = getpgid(pid);
311 // only should happen if pid is invalid, which should never happen...
312 log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
318 void base_process_service::timer_expired() noexcept
320 stop_timer_armed = false;
323 // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
324 // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
325 // including smooth recovery (restart timeout, state is STARTING or STARTED).
326 if (get_state() == service_state_t::STOPPING) {
329 else if (pid != -1) {
330 // Starting, start timed out.
335 // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)