4 #include <sys/socket.h>
10 #include "dinit-log.h"
11 #include "dinit-socket.h"
12 #include "proc-service.h"
14 #include "baseproc-sys.h"
17 * Base process implementation (base_process_service).
19 * See proc-service.h for interface documentation.
22 void base_process_service::do_smooth_recovery() noexcept
24 if (! restart_ps_process()) {
26 services->process_queues();
30 bool base_process_service::bring_up() noexcept
34 return restart_ps_process();
39 if (! open_socket()) {
43 event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
44 restart_interval_count = 0;
45 if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
46 if (start_timeout != time_val(0,0)) {
47 restart_timer.arm_timer_rel(event_loop, start_timeout);
48 stop_timer_armed = true;
50 else if (stop_timer_armed) {
51 restart_timer.stop_timer(event_loop);
52 stop_timer_armed = false;
60 bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
62 // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
63 // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
64 // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
65 // is written to the pipe, and the parent can read it.
67 event_loop.get_time(last_start_time, clock_type::MONOTONIC);
70 if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
71 log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
75 const char * logfile = this->logfile.c_str();
77 logfile = "/dev/null";
80 bool child_status_registered = false;
81 control_conn_t *control_conn = nullptr;
83 int control_socket[2] = {-1, -1};
84 if (onstart_flags.pass_cs_fd) {
85 if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
86 log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
90 // Make the server side socket close-on-exec:
91 int fdflags = bp_sys::fcntl(control_socket[0], F_GETFD);
92 bp_sys::fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
95 control_conn = new control_conn_t(event_loop, services, control_socket[0]);
97 catch (std::exception &exc) {
98 log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
103 // Set up complete, now fork and exec:
108 child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
109 child_status_registered = true;
111 // We specify a high priority (i.e. low priority value) so that process termination is
112 // handled early. This means we have always recorded that the process is terminated by the
113 // time that we handle events that might otherwise cause us to signal the process, so we
114 // avoid sending a signal to an invalid (and possibly recycled) process ID.
115 forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
116 reserved_child_watch = true;
118 catch (std::exception &e) {
119 log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
124 run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1], socket_fd);
128 bp_sys::close(pipefd[1]); // close the 'other end' fd
129 if (control_socket[1] != -1) {
130 bp_sys::close(control_socket[1]);
134 waiting_for_execstat = true;
141 if (child_status_registered) {
142 child_status_listener.deregister(event_loop);
145 if (onstart_flags.pass_cs_fd) {
149 bp_sys::close(control_socket[0]);
150 bp_sys::close(control_socket[1]);
154 bp_sys::close(pipefd[0]);
155 bp_sys::close(pipefd[1]);
160 void base_process_service::bring_down() noexcept
163 // The process is still kicking on - must actually kill it. We signal the process
164 // group (-pid) rather than just the process as there's less risk then of creating
165 // an orphaned process group:
166 if (! onstart_flags.no_sigterm) {
169 if (term_signal != -1) {
170 kill_pg(term_signal);
173 // In most cases, the rest is done in handle_exit_status.
174 // If we are a BGPROCESS and the process is not our immediate child, however, that
175 // won't work - check for this now:
176 if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
179 else if (stop_timeout != time_val(0,0)) {
180 restart_timer.arm_timer_rel(event_loop, stop_timeout);
181 stop_timer_armed = true;
185 // The process is already dead.
190 base_process_service::base_process_service(service_set *sset, string name,
191 service_type_t service_type_p, string &&command,
192 std::list<std::pair<unsigned,unsigned>> &command_offsets,
193 const std::list<prelim_dep> &deplist_p)
194 : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
195 child_status_listener(this), restart_timer(this)
197 program_name = std::move(command);
198 exec_arg_parts = separate_args(program_name, command_offsets);
200 restart_interval_count = 0;
201 restart_interval_time = {0, 0};
202 restart_timer.service = this;
203 restart_timer.add_timer(event_loop);
205 // By default, allow a maximum of 3 restarts within 10.0 seconds:
206 restart_interval.seconds() = 10;
207 restart_interval.nseconds() = 0;
208 max_restart_interval_count = 3;
210 waiting_restart_timer = false;
211 reserved_child_watch = false;
212 tracking_child = false;
213 stop_timer_armed = false;
214 start_is_interruptible = false;
217 void base_process_service::do_restart() noexcept
219 waiting_restart_timer = false;
220 restart_interval_count++;
221 auto service_state = get_state();
223 if (service_state == service_state_t::STARTING) {
224 // for a smooth recovery, we want to check dependencies are available before actually
226 if (! check_deps_started()) {
227 waiting_for_deps = true;
232 if (! start_ps_process(exec_arg_parts, have_console)) {
234 if (service_state == service_state_t::STARTING) {
238 // desired_state = service_state_t::STOPPED;
241 services->process_queues();
245 bool base_process_service::restart_ps_process() noexcept
247 using time_val = dasynq::time_val;
249 time_val current_time;
250 event_loop.get_time(current_time, clock_type::MONOTONIC);
252 if (max_restart_interval_count != 0) {
253 // Check whether we're still in the most recent restart check interval:
254 time_val int_diff = current_time - restart_interval_time;
255 if (int_diff < restart_interval) {
256 if (restart_interval_count >= max_restart_interval_count) {
257 log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
262 restart_interval_time = current_time;
263 restart_interval_count = 0;
267 // Check if enough time has lapsed since the previous restart. If not, start a timer:
268 time_val tdiff = current_time - last_start_time;
269 if (restart_delay <= tdiff) {
270 // > restart delay (normally 200ms)
274 time_val timeout = restart_delay - tdiff;
275 restart_timer.arm_timer_rel(event_loop, timeout);
276 waiting_restart_timer = true;
281 bool base_process_service::interrupt_start() noexcept
283 if (waiting_restart_timer) {
284 restart_timer.stop_timer(event_loop);
285 waiting_restart_timer = false;
286 return service_record::interrupt_start();
289 log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
291 if (stop_timeout != time_val(0,0)) {
292 restart_timer.arm_timer_rel(event_loop, stop_timeout);
293 stop_timer_armed = true;
295 else if (stop_timer_armed) {
296 restart_timer.stop_timer(event_loop);
297 stop_timer_armed = false;
299 set_state(service_state_t::STOPPING);
300 notify_listeners(service_event_t::STARTCANCELLED);
305 void base_process_service::kill_with_fire() noexcept
308 log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
313 void base_process_service::kill_pg(int signo) noexcept
315 pid_t pgid = bp_sys::getpgid(pid);
317 // only should happen if pid is invalid, which should never happen...
318 log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
321 bp_sys::kill(-pgid, signo);
324 void base_process_service::timer_expired() noexcept
326 stop_timer_armed = false;
329 // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
330 // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
331 // including smooth recovery (restart timeout, state is STARTING or STARTED).
332 if (get_state() == service_state_t::STOPPING) {
335 else if (pid != -1) {
336 // Starting, start timed out.
338 if (start_explicit) {
339 start_explicit = false;
345 // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
350 void base_process_service::emergency_stop() noexcept
352 if (! do_auto_restart() && start_explicit) {
353 start_explicit = false;
361 void base_process_service::becoming_inactive() noexcept
363 if (socket_fd != -1) {
369 bool base_process_service::open_socket() noexcept
371 if (socket_path.empty() || socket_fd != -1) {
372 // No socket, or already open
376 const char * saddrname = socket_path.c_str();
378 // Check the specified socket path
379 struct stat stat_buf;
380 if (stat(saddrname, &stat_buf) == 0) {
381 if ((stat_buf.st_mode & S_IFSOCK) == 0) {
383 log(loglevel_t::ERROR, get_name(), ": Activation socket file exists (and is not a socket)");
387 else if (errno != ENOENT) {
389 log(loglevel_t::ERROR, get_name(), ": Error checking activation socket: ", strerror(errno));
393 // Remove stale socket file (if it exists).
394 // We won't test the return from unlink - if it fails other than due to ENOENT, we should get an
395 // error when we try to create the socket anyway.
398 uint sockaddr_size = offsetof(struct sockaddr_un, sun_path) + socket_path.length() + 1;
399 struct sockaddr_un * name = static_cast<sockaddr_un *>(malloc(sockaddr_size));
400 if (name == nullptr) {
401 log(loglevel_t::ERROR, get_name(), ": Opening activation socket: out of memory");
405 name->sun_family = AF_UNIX;
406 strcpy(name->sun_path, saddrname);
408 int sockfd = dinit_socket(AF_UNIX, SOCK_STREAM, 0, SOCK_NONBLOCK | SOCK_CLOEXEC);
410 log(loglevel_t::ERROR, get_name(), ": Error creating activation socket: ", strerror(errno));
415 if (bind(sockfd, (struct sockaddr *) name, sockaddr_size) == -1) {
416 log(loglevel_t::ERROR, get_name(), ": Error binding activation socket: ", strerror(errno));
424 // POSIX (1003.1, 2013) says that fchown and fchmod don't necessarily work on sockets. We have to
425 // use chown and chmod instead.
426 if (chown(saddrname, socket_uid, socket_gid)) {
427 log(loglevel_t::ERROR, get_name(), ": Error setting activation socket owner/group: ", strerror(errno));
432 if (chmod(saddrname, socket_perms) == -1) {
433 log(loglevel_t::ERROR, get_name(), ": Error setting activation socket permissions: ", strerror(errno));
438 if (listen(sockfd, 128) == -1) { // 128 "seems reasonable".
439 log(loglevel_t::ERROR, ": Error listening on activation socket: ", strerror(errno));