src/baseproc-service.cc

   1 #include <cstring>
   2
   3 #include "dinit.h"
   4 #include "dinit-log.h"
   5 #include "dinit-socket.h"
   6 #include "proc-service.h"
   7
   8 /*
   9  * Base process implementation (base_process_service).
  10  *
  11  * See proc-service.h for interface documentation.
  12  */
  13
  14 void base_process_service::do_smooth_recovery() noexcept
  15 {
  16     if (! restart_ps_process()) {
  17         emergency_stop();
  18         services->process_queues();
  19     }
  20 }
  21
  22 bool base_process_service::bring_up() noexcept
  23 {
  24     if (restarting) {
  25         if (pid == -1) {
  26             return restart_ps_process();
  27         }
  28         return true;
  29     }
  30     else {
  31         event_loop.get_time(restart_interval_time, clock_type::MONOTONIC);
  32         restart_interval_count = 0;
  33         if (start_ps_process(exec_arg_parts, onstart_flags.starts_on_console)) {
  34             if (start_timeout != time_val(0,0)) {
  35                 restart_timer.arm_timer_rel(event_loop, start_timeout);
  36                 stop_timer_armed = true;
  37             }
  38             else if (stop_timer_armed) {
  39                 restart_timer.stop_timer(event_loop);
  40                 stop_timer_armed = false;
  41             }
  42             return true;
  43         }
  44         return false;
  45     }
  46 }
  47
  48 bool base_process_service::start_ps_process(const std::vector<const char *> &cmd, bool on_console) noexcept
  49 {
  50     // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate
  51     // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful
  52     // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno
  53     // is written to the pipe, and the parent can read it.
  54
  55     event_loop.get_time(last_start_time, clock_type::MONOTONIC);
  56
  57     int pipefd[2];
  58     if (dasynq::pipe2(pipefd, O_CLOEXEC)) {
  59         log(loglevel_t::ERROR, get_name(), ": can't create status check pipe: ", strerror(errno));
  60         return false;
  61     }
  62
  63     const char * logfile = this->logfile.c_str();
  64     if (*logfile == 0) {
  65         logfile = "/dev/null";
  66     }
  67
  68     bool child_status_registered = false;
  69     control_conn_t *control_conn = nullptr;
  70
  71     int control_socket[2] = {-1, -1};
  72     if (onstart_flags.pass_cs_fd) {
  73         if (dinit_socketpair(AF_UNIX, SOCK_STREAM, /* protocol */ 0, control_socket, SOCK_NONBLOCK)) {
  74             log(loglevel_t::ERROR, get_name(), ": can't create control socket: ", strerror(errno));
  75             goto out_p;
  76         }
  77
  78         // Make the server side socket close-on-exec:
  79         int fdflags = fcntl(control_socket[0], F_GETFD);
  80         fcntl(control_socket[0], F_SETFD, fdflags | FD_CLOEXEC);
  81
  82         try {
  83             control_conn = new control_conn_t(event_loop, services, control_socket[0]);
  84         }
  85         catch (std::exception &exc) {
  86             log(loglevel_t::ERROR, get_name(), ": can't launch process; out of memory");
  87             goto out_cs;
  88         }
  89     }
  90
  91     // Set up complete, now fork and exec:
  92
  93     pid_t forkpid;
  94
  95     try {
  96         child_status_listener.add_watch(event_loop, pipefd[0], dasynq::IN_EVENTS);
  97         child_status_registered = true;
  98
  99         // We specify a high priority (i.e. low priority value) so that process termination is
 100         // handled early. This means we have always recorded that the process is terminated by the
 101         // time that we handle events that might otherwise cause us to signal the process, so we
 102         // avoid sending a signal to an invalid (and possibly recycled) process ID.
 103         forkpid = child_listener.fork(event_loop, reserved_child_watch, dasynq::DEFAULT_PRIORITY - 10);
 104         reserved_child_watch = true;
 105     }
 106     catch (std::exception &e) {
 107         log(loglevel_t::ERROR, get_name(), ": Could not fork: ", e.what());
 108         goto out_cs_h;
 109     }
 110
 111     if (forkpid == 0) {
 112         run_child_proc(cmd.data(), logfile, on_console, pipefd[1], control_socket[1]);
 113     }
 114     else {
 115         // Parent process
 116         close(pipefd[1]); // close the 'other end' fd
 117         if (control_socket[1] != -1) {
 118             close(control_socket[1]);
 119         }
 120         pid = forkpid;
 121
 122         waiting_for_execstat = true;
 123         return true;
 124     }
 125
 126     // Failure exit:
 127
 128     out_cs_h:
 129     if (child_status_registered) {
 130         child_status_listener.deregister(event_loop);
 131     }
 132
 133     if (onstart_flags.pass_cs_fd) {
 134         delete control_conn;
 135
 136         out_cs:
 137         close(control_socket[0]);
 138         close(control_socket[1]);
 139     }
 140
 141     out_p:
 142     close(pipefd[0]);
 143     close(pipefd[1]);
 144
 145     return false;
 146 }
 147
 148 void base_process_service::bring_down() noexcept
 149 {
 150     waiting_for_deps = false;
 151     if (pid != -1) {
 152         // The process is still kicking on - must actually kill it. We signal the process
 153         // group (-pid) rather than just the process as there's less risk then of creating
 154         // an orphaned process group:
 155         if (! onstart_flags.no_sigterm) {
 156             kill_pg(SIGTERM);
 157         }
 158         if (term_signal != -1) {
 159             kill_pg(term_signal);
 160         }
 161
 162         // In most cases, the rest is done in handle_exit_status.
 163         // If we are a BGPROCESS and the process is not our immediate child, however, that
 164         // won't work - check for this now:
 165         if (get_type() == service_type_t::BGPROCESS && ! tracking_child) {
 166             stopped();
 167         }
 168         else if (stop_timeout != time_val(0,0)) {
 169             restart_timer.arm_timer_rel(event_loop, stop_timeout);
 170             stop_timer_armed = true;
 171         }
 172     }
 173     else {
 174         // The process is already dead.
 175         stopped();
 176     }
 177 }
 178
 179 base_process_service::base_process_service(service_set *sset, string name,
 180         service_type_t service_type_p, string &&command,
 181         std::list<std::pair<unsigned,unsigned>> &command_offsets,
 182         const std::list<prelim_dep> &deplist_p)
 183      : service_record(sset, name, service_type_p, deplist_p), child_listener(this),
 184        child_status_listener(this), restart_timer(this)
 185 {
 186     program_name = std::move(command);
 187     exec_arg_parts = separate_args(program_name, command_offsets);
 188
 189     restart_interval_count = 0;
 190     restart_interval_time = {0, 0};
 191     restart_timer.service = this;
 192     restart_timer.add_timer(event_loop);
 193
 194     // By default, allow a maximum of 3 restarts within 10.0 seconds:
 195     restart_interval.seconds() = 10;
 196     restart_interval.nseconds() = 0;
 197     max_restart_interval_count = 3;
 198
 199     waiting_restart_timer = false;
 200     reserved_child_watch = false;
 201     tracking_child = false;
 202     stop_timer_armed = false;
 203     start_is_interruptible = false;
 204 }
 205
 206 void base_process_service::do_restart() noexcept
 207 {
 208     waiting_restart_timer = false;
 209     restart_interval_count++;
 210     auto service_state = get_state();
 211
 212     // We may be STARTING (regular restart) or STARTED ("smooth recovery"). This affects whether
 213     // the process should be granted access to the console:
 214     bool on_console = service_state == service_state_t::STARTING
 215             ? onstart_flags.starts_on_console : onstart_flags.runs_on_console;
 216
 217     if (service_state == service_state_t::STARTING) {
 218         // for a smooth recovery, we want to check dependencies are available before actually
 219         // starting:
 220         if (! check_deps_started()) {
 221             waiting_for_deps = true;
 222             return;
 223         }
 224     }
 225
 226     if (! start_ps_process(exec_arg_parts, on_console)) {
 227         restarting = false;
 228         if (service_state == service_state_t::STARTING) {
 229             failed_to_start();
 230         }
 231         else {
 232             // desired_state = service_state_t::STOPPED;
 233             forced_stop();
 234         }
 235         services->process_queues();
 236     }
 237 }
 238
 239 bool base_process_service::restart_ps_process() noexcept
 240 {
 241     using time_val = dasynq::time_val;
 242
 243     time_val current_time;
 244     event_loop.get_time(current_time, clock_type::MONOTONIC);
 245
 246     if (max_restart_interval_count != 0) {
 247         // Check whether we're still in the most recent restart check interval:
 248         time_val int_diff = current_time - restart_interval_time;
 249         if (int_diff < restart_interval) {
 250             if (restart_interval_count >= max_restart_interval_count) {
 251                 log(loglevel_t::ERROR, "Service ", get_name(), " restarting too quickly; stopping.");
 252                 return false;
 253             }
 254         }
 255         else {
 256             restart_interval_time = current_time;
 257             restart_interval_count = 0;
 258         }
 259     }
 260
 261     // Check if enough time has lapsed since the prevous restart. If not, start a timer:
 262     time_val tdiff = current_time - last_start_time;
 263     if (restart_delay <= tdiff) {
 264         // > restart delay (normally 200ms)
 265         do_restart();
 266     }
 267     else {
 268         time_val timeout = restart_delay - tdiff;
 269         restart_timer.arm_timer_rel(event_loop, timeout);
 270         waiting_restart_timer = true;
 271     }
 272     return true;
 273 }
 274
 275 bool base_process_service::interrupt_start() noexcept
 276 {
 277     if (waiting_restart_timer) {
 278         restart_timer.stop_timer(event_loop);
 279         waiting_restart_timer = false;
 280         return service_record::interrupt_start();
 281     }
 282     else {
 283         log(loglevel_t::WARN, "Interrupting start of service ", get_name(), " with pid ", pid, " (with SIGINT).");
 284         kill_pg(SIGINT);
 285         if (stop_timeout != time_val(0,0)) {
 286             restart_timer.arm_timer_rel(event_loop, stop_timeout);
 287             stop_timer_armed = true;
 288         }
 289         else if (stop_timer_armed) {
 290             restart_timer.stop_timer(event_loop);
 291             stop_timer_armed = false;
 292         }
 293         set_state(service_state_t::STOPPING);
 294         notify_listeners(service_event_t::STARTCANCELLED);
 295         return false;
 296     }
 297 }
 298
 299 void base_process_service::kill_with_fire() noexcept
 300 {
 301     if (pid != -1) {
 302         log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed stop time; killing.");
 303         kill_pg(SIGKILL);
 304     }
 305 }
 306
 307 void base_process_service::kill_pg(int signo) noexcept
 308 {
 309     pid_t pgid = getpgid(pid);
 310     if (pgid == -1) {
 311         // only should happen if pid is invalid, which should never happen...
 312         log(loglevel_t::ERROR, get_name(), ": can't signal process: ", strerror(errno));
 313         return;
 314     }
 315     kill(-pgid, signo);
 316 }
 317
 318 void base_process_service::timer_expired() noexcept
 319 {
 320     stop_timer_armed = false;
 321
 322     // Timer expires if:
 323     // We are stopping, including after having startup cancelled (stop timeout, state is STOPPING); We are
 324     // starting (start timeout, state is STARTING); We are waiting for restart timer before restarting,
 325     // including smooth recovery (restart timeout, state is STARTING or STARTED).
 326     if (get_state() == service_state_t::STOPPING) {
 327         kill_with_fire();
 328     }
 329     else if (pid != -1) {
 330         // Starting, start timed out.
 331         stop_dependents();
 332         interrupt_start();
 333     }
 334     else {
 335         // STARTING / STARTED, and we have a pid: must be restarting (smooth recovery if STARTED)
 336         do_restart();
 337     }
 338 }