5 #include <sys/socket.h>
11 #include "dinit-log.h"
12 #include "dinit-socket.h"
13 #include "proc-service.h"
15 #include "baseproc-sys.h"
18 * Base process implementation (base_process_service).
20 * See proc-service.h for interface documentation.
23 void base_process_service::do_smooth_recovery() noexcept
25 if (! restart_ps_process()) {
27 services->process_queues();
31 bool base_process_service::bring_up() noexcept
35 return restart_ps_process();
40 if (! open_socket()) {
44 restart_interval_count = 0;
45 if (start_ps_process(exec_arg_parts,
46 onstart_flags.starts_on_console || onstart_flags.shares_console)) {
47 // start_ps_process updates last_start_time, use it also for restart_interval_time:
48 restart_interval_time = last_start_time;
49 // Note: we don't set a start timeout for PROCESS services.
50 if (start_timeout != time_val(0,0) && get_type() != service_type_t::PROCESS) {
51 restart_timer.arm_timer_rel(event_loop, start_timeout);
52 stop_timer_armed = true;
54 else if (stop_timer_armed) {
55 restart_timer.stop_timer(event_loop);
56 stop_timer_armed = false;
60 restart_interval_time = last_start_time;
65 bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
67 // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
68 // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
69 // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
70 // is written to the pipe, and the parent can read it.
72 event_loop.get_time(last_start_time, clock_type::MONOTONIC);
75 if (bp_sys::pipe2(pipefd, O_CLOEXEC)) {
76 log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
80 const char * logfile = this->logfile.c_str();
82 logfile = "/dev/null";
85 bool child_status_registered = false;
86 control_conn_t *control_conn = nullptr;
88 int control_socket[2] = {-1, -1};
89 int notify_pipe[2] = {-1, -1};
90 bool have_notify = !notification_var.empty() || force_notification_fd != -1;
91 ready_notify_watcher * rwatcher = have_notify ? get_ready_watcher() : nullptr;
92 bool ready_watcher_registered = false;
94 if (onstart_flags.pass_cs_fd) {
95 if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
96 log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
100 // Make the server side socket close-on-exec:
101 int fdflags = bp_sys::fcntl(control_socket[0], F_GETFD);
102 bp_sys::fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
105 control_conn = new control_conn_t(event_loop, services, control_socket[0]);
107 catch (std::exception &exc) {
108 log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
114 // Create a notification pipe:
115 if (bp_sys::pipe2(notify_pipe, 0) != 0) {
116 log(loglevel_t::ERROR, get_name(), ": can't create notification pipe: ", strerror(errno));
120 // Set the read side as close-on-exec:
121 int fdflags = bp_sys::fcntl(notify_pipe[0], F_GETFD);
122 bp_sys::fcntl(notify_pipe[0], F_SETFD, fdflags | FD_CLOEXEC);
124 // add, but don't yet enable, readiness watcher:
126 rwatcher->add_watch(event_loop, notify_pipe[0], dasynq::IN_EVENTS, false);
127 ready_watcher_registered = true;
129 catch (std::exception &exc) {
130 log(loglevel_t::ERROR, get_name(), ": can't add notification watch: ", exc.what());
134 // Set up complete, now fork and exec:
139 child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
140 child_status_registered = true;
142 // We specify a high priority (i.e. low priority value) so that process termination is
143 // handled early. This means we have always recorded that the process is terminated by the
144 // time that we handle events that might otherwise cause us to signal the process, so we
145 // avoid sending a signal to an invalid (and possibly recycled) process ID.
146 forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
147 reserved_child_watch = true;
149 catch (std::exception &e) {
150 log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
155 const char * working_dir_c = nullptr;
156 if (! working_dir.empty()) working_dir_c = working_dir.c_str();
157 after_fork(getpid());
158 run_proc_params run_params{cmd.data(), working_dir_c, logfile, pipefd[1], run_as_uid, run_as_gid, rlimits};
159 run_params.on_console = on_console;
160 run_params.csfd = control_socket[1];
161 run_params.socket_fd = socket_fd;
162 run_params.notify_fd = notify_pipe[1];
163 run_params.force_notify_fd = force_notification_fd;
164 run_params.notify_var = notification_var.c_str();
165 run_params.env_file = env_file.c_str();
166 run_child_proc(run_params);
172 bp_sys::close(pipefd[1]); // close the 'other end' fd
173 if (control_socket[1] != -1) bp_sys::close(control_socket[1]);
174 if (notify_pipe[1] != -1) bp_sys::close(notify_pipe[1]);
175 notification_fd = notify_pipe[0];
176 waiting_for_execstat = true;
183 if (child_status_registered) {
184 child_status_listener.deregister(event_loop);
187 if (notify_pipe[0] != -1) bp_sys::close(notify_pipe[0]);
188 if (notify_pipe[1] != -1) bp_sys::close(notify_pipe[1]);
189 if (ready_watcher_registered) {
190 rwatcher->deregister(event_loop);
193 if (onstart_flags.pass_cs_fd) {
197 bp_sys::close(control_socket[0]);
198 bp_sys::close(control_socket[1]);
202 bp_sys::close(pipefd[0]);
203 bp_sys::close(pipefd[1]);
208 base_process_service::base_process_service(service_set *sset, string name,
209 service_type_t service_type_p, string &&command,
210 const std::list<std::pair<unsigned,unsigned>> &command_offsets,
211 const std::list<prelim_dep> &deplist_p)
212 : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
213 child_status_listener(this), restart_timer(this)
215 program_name = std::move(command);
216 exec_arg_parts = separate_args(program_name, command_offsets);
218 restart_interval_count = 0;
219 restart_interval_time = {0, 0};
220 restart_timer.service = this;
221 restart_timer.add_timer(event_loop);
223 // By default, allow a maximum of 3 restarts within 10.0 seconds:
224 restart_interval.seconds() = 10;
225 restart_interval.nseconds() = 0;
226 max_restart_interval_count = 3;
228 waiting_restart_timer = false;
229 reserved_child_watch = false;
230 tracking_child = false;
231 stop_timer_armed = false;
234 void base_process_service::do_restart() noexcept
236 waiting_restart_timer = false;
237 restart_interval_count++;
238 auto service_state = get_state();
240 if (service_state == service_state_t::STARTING) {
241 // for a smooth recovery, we want to check dependencies are available before actually
243 if (! check_deps_started()) {
244 waiting_for_deps = true;
249 if (! start_ps_process(exec_arg_parts, have_console || onstart_flags.shares_console)) {
251 if (service_state == service_state_t::STARTING) {
255 // desired_state = service_state_t::STOPPED;
258 services->process_queues();
262 bool base_process_service::restart_ps_process() noexcept
264 using time_val = dasynq::time_val;
266 time_val current_time;
267 event_loop.get_time(current_time, clock_type::MONOTONIC);
269 if (max_restart_interval_count != 0) {
270 // Check whether we're still in the most recent restart check interval:
271 time_val int_diff = current_time - restart_interval_time;
272 if (int_diff < restart_interval) {
273 if (restart_interval_count >= max_restart_interval_count) {
274 log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
279 restart_interval_time = current_time;
280 restart_interval_count = 0;
284 // Check if enough time has lapsed since the previous restart. If not, start a timer:
285 time_val tdiff = current_time - last_start_time;
286 if (restart_delay <= tdiff) {
287 // > restart delay (normally 200ms)
291 time_val timeout = restart_delay - tdiff;
292 restart_timer.arm_timer_rel(event_loop, timeout);
293 waiting_restart_timer = true;
298 bool base_process_service::interrupt_start() noexcept
300 if (waiting_restart_timer) {
301 restart_timer.stop_timer(event_loop);
302 waiting_restart_timer = false;
303 return service_record::interrupt_start();
306 log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid,
310 if (stop_timeout != time_val(0,0)) {
311 restart_timer.arm_timer_rel(event_loop, stop_timeout);
312 stop_timer_armed = true;
314 else if (stop_timer_armed) {
315 restart_timer.stop_timer(event_loop);
316 stop_timer_armed = false;
319 set_state(service_state_t::STOPPING);
324 void base_process_service::kill_with_fire() noexcept
327 log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid,
328 " exceeded allowed stop time; killing.");
333 void base_process_service::kill_pg(int signo) noexcept
335 if (onstart_flags.signal_process_only) {
336 bp_sys::kill(pid, signo);
339 pid_t pgid = bp_sys::getpgid(pid);
341 // On some OSes (eg OpenBSD) we aren't allowed to get the pgid of a process in a different
342 // session. If the process is in a different session, however, it must be a process group
343 // leader and the pgid must equal the process id.
346 bp_sys::kill(-pgid, signo);
350 void base_process_service::timer_expired() noexcept
352 stop_timer_armed = false;
355 // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
356 // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
357 // including smooth recovery (restart timeout, state is STARTING or STARTED).
358 if (get_state() == service_state_t::STOPPING) {
361 else if (pid != -1) {
362 // Starting, start timed out.
363 log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid,
364 " exceeded allowed start time; cancelling.");
366 stop_reason = stopped_reason_t::TIMEDOUT;
367 failed_to_start(false, false);
370 // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
375 void base_process_service::emergency_stop() noexcept
377 if (! do_auto_restart() && start_explicit) {
378 start_explicit = false;
385 void base_process_service::becoming_inactive() noexcept
387 if (socket_fd != -1) {
393 bool base_process_service::open_socket() noexcept
395 if (socket_path.empty() || socket_fd != -1) {
396 // No socket, or already open
400 const char * saddrname = socket_path.c_str();
402 // Check the specified socket path
403 struct stat stat_buf;
404 if (stat(saddrname, &stat_buf) == 0) {
405 if ((stat_buf.st_mode & S_IFSOCK) == 0) {
407 log(loglevel_t::ERROR, get_name(), ": Activation socket file exists (and is not a socket)");
411 else if (errno != ENOENT) {
413 log(loglevel_t::ERROR, get_name(), ": Error checking activation socket: ", strerror(errno));
417 // Remove stale socket file (if it exists).
418 // We won't test the return from unlink - if it fails other than due to ENOENT, we should get an
419 // error when we try to create the socket anyway.
422 uint sockaddr_size = offsetof(struct sockaddr_un, sun_path) + socket_path.length() + 1;
423 struct sockaddr_un * name = static_cast<sockaddr_un *>(malloc(sockaddr_size));
424 if (name == nullptr) {
425 log(loglevel_t::ERROR, get_name(), ": Opening activation socket: out of memory");
429 name->sun_family = AF_UNIX;
430 strcpy(name->sun_path, saddrname);
432 int sockfd = dinit_socket(AF_UNIX, SOCK_STREAM, 0, SOCK_NONBLOCK | SOCK_CLOEXEC);
434 log(loglevel_t::ERROR, get_name(), ": Error creating activation socket: ", strerror(errno));
439 if (bind(sockfd, (struct sockaddr *) name, sockaddr_size) == -1) {
440 log(loglevel_t::ERROR, get_name(), ": Error binding activation socket: ", strerror(errno));
448 // POSIX (1003.1, 2013) says that fchown and fchmod don't necessarily work on sockets. We have to
449 // use chown and chmod instead.
450 if (chown(saddrname, socket_uid, socket_gid)) {
451 log(loglevel_t::ERROR, get_name(), ": Error setting activation socket owner/group: ",
457 if (chmod(saddrname, socket_perms) == -1) {
458 log(loglevel_t::ERROR, get_name(), ": Error setting activation socket permissions: ",
464 if (listen(sockfd, 128) == -1) { // 128 "seems reasonable".
465 log(loglevel_t::ERROR, ": Error listening on activation socket: ", strerror(errno));