From 150d7e46a7a7962c1f74b45e4dd5afa71fb0f5b2 Mon Sep 17 00:00:00 2001 From: Davin McCall Date: Thu, 27 Aug 2015 18:54:23 +0100 Subject: [PATCH 1/1] Initial commit. --- Makefile | 32 ++++ README | 49 +++++ TODO | 44 +++++ control.h | 143 ++++++++++++++ dinit-start.cc | 88 +++++++++ dinit.cc | 329 ++++++++++++++++++++++++++++++++ load_service.cc | 200 ++++++++++++++++++++ service.cc | 493 ++++++++++++++++++++++++++++++++++++++++++++++++ service.h | 204 ++++++++++++++++++++ 9 files changed, 1582 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100644 TODO create mode 100644 control.h create mode 100644 dinit-start.cc create mode 100644 dinit.cc create mode 100644 load_service.cc create mode 100644 service.cc create mode 100644 service.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d77bbfb --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +-include mconfig + +objects = dinit.o load_service.o service.o dinit-start.o + +dinit_objects = dinit.o load_service.o service.o + +all: dinit dinit-start + +dinit: $(dinit_objects) + g++ -Wall -o dinit $(dinit_objects) -lev + +dinit-start: dinit-start.o + g++ -Wall -o dinit-start dinit-start.o + +# Note we use the old ABI on GCC 5.2 to avoid GCC bug 66145. +$(objects): %.o: %.cc service.h + g++ -D_GLIBCXX_USE_CXX11_ABI=0 -std=gnu++11 -c -Os -Wall $< -o $@ + +install: all + #install -d $(LOGINBINDIR) $(LOGINDOCDIR) + #install -s login $(LOGINBINDIR) + #install --mode=644 README $(LOGINDOCDIR) + #@echo + #@echo "You may also wish to \"make install.man\"." + +install.man: + #install -d $(MAN1DIR) + #install --mode=644 login.1 $(MAN1DIR) + +clean: + rm *.o + rm dinit diff --git a/README b/README new file mode 100644 index 0000000..9c2e581 --- /dev/null +++ b/README @@ -0,0 +1,49 @@ +dinit +----- +v0.1 + +What is it? +=-=-=-=-=-= + +"dinit" is destined to eventually become a replacement "init" program for +Linux (and possibly other) systems. But it's not there yet. + +However, dinit can be used as a general service monitor. Specifically it +can launch multiple services (generally, "daemon" processes, but see notes +below) in parallel, with dependency management (i.e. if one service's +operation depends on another, the latter service will be started first). + +For "process" services (the only kind of service that v0.1 supports) dinit +can monitor the process corresponding to the service, and re-start it if it +dies. It does this in an intelligent way, first "rolling back" all dependent +services (which it will later re-start, if configured to do so). + +Notes on services +=-=-=-=-=-=-=-=-= + +The only type of service supported in v0.1 are "process" services. Process +services are so named because they run as a process which dinit launches +and monitors directly (this is opposed to "scripted" services, scheduled for +v0.5, which will be started and stopped with the aid of a script or other +program, and which cannot be directly monitored by dinit). + +Many programs that you might want to run under dinit's supervision can run +either "in the foreground" or as a daemon ("in the background"), and the +choice is dictated by a command line switch (for instance the -D and -F +switches to Samba's "smbd"). Although it might seem counterintuitive, +the "foreground" mode should be used for programs registered as process +services in dinit; this allows dinit to monitor the process. + +Process services are attractive due to the ease of monitoring (and +restarting) the service, however, they have one inherent problem, which is +that dinit cannot tell when the service is truly started. Once the process +has been launched, dinit assumes that the service has started, but in fact +there will be a short delay before the process sets itself up, starts +listening on sockets, etc; during this time any other process (including +one from a service listed as dependent) which tries to contact it will not +be able to do so. + + +Things to document about current implementation +----------------------------------------------- +* logfiles won't work until filesystem mounted (/dev/null should, though) diff --git a/TODO b/TODO new file mode 100644 index 0000000..3c49145 --- /dev/null +++ b/TODO @@ -0,0 +1,44 @@ +* Documentation including sample service definitions +* if PID = 1, don't interpret comnand line args as services to start (except for + "single"). +* if PID != 1, choose a more sensible service definition directory + (something like $HOME/dinit.d) +* Support "virtual" services (so you don't have to have "scripted" services where + where the script is /bin/true) +* Better error handling, logging of errors +* Implement a control utility to start/stop services after dinit has started +* Allow command-line arguments to service processes +* Allow logging tasks to memory (growing or circular buffer) and later + switching to disk logging (allows for filesystem mounted readonly on boot) +* Rate control on process respawn +* Maybe re-implement "shutdown" ("halt", "reboot") from util-linux to better work + with dinit. +* Support recognising /etc/init.d services automatically (as script services, with + no dependency management - or upstart compatible dependency management) +* Write wtmp entry on startup (see simpleinit) +* Allow running services as a different UID, resource limits, chroot, cgroups, + namespaces (pid/fs/uid), etc +* Should services be started in a new session (setsid()?) and/or process group + (setpgid()?) +* Make default control socket location build-time configurable + +Later: +* Place some reasonable, soft limit on the number of services to be started + simultaneously, to prevent thrashing. Services that are taking a long time + to start don't count to the limit. Maybe use CPU/IO usage as a controlling + factor. +* Cron-like tasks (if started, they run a sub-task periodically. Stopping the + task will wait until the sub-task is complete). These don't need to be + special service type, just have dinit handle cron entries which specify when + a particular service needs to be started (and have it not auto-restart). +* Allow binding AF_UNIX sockets and when receiving connection to them, start some + service which implements that service eg: + /dev/log -> syslog service + /var/run/mdnsd -> mDNSResponder (mdndsd) +* Allow to run services attached to virtual tty, allow connection to that tty (ala "screen"). +* SystemD-like handling of filesystem mounts (see autofs documentation in kernel) + + +Investigate: +* What's the best TERM setting? gogetty gives me "linux" but I think other variants may be + better. diff --git a/control.h b/control.h new file mode 100644 index 0000000..51479f1 --- /dev/null +++ b/control.h @@ -0,0 +1,143 @@ +#include + +// Control connection for dinit + + +// forward-declaration of callback: +static void control_conn_cb(struct ev_loop * loop, ev_io * w, int revents); + + +// Packet types: +constexpr static int DINIT_CP_STARTSERVICE = 0; +constexpr static int DINIT_CP_STOPSERVICE = 1; + +// "packet" format: +// (1 byte) packet type +// (N bytes) additional data (service name, etc) +// for STARTSERVICE/STOPSERVICE: +// (2 bytes) service name length +// (M buyes) service name (without nul terminator) + + +class ControlConn +{ + struct ev_io iob; + struct ev_loop *loop; + ServiceSet *service_set; + char * iobuf; + int bufidx; + + // The packet length before we need to re-check if the packet is complete + int chklen; + + public: + ControlConn(struct ev_loop * loop, ServiceSet * service_set, int fd) : loop(loop), service_set(service_set), bufidx(0), chklen(0) + { + iobuf = new char[1024]; + + ev_io_init(&iob, control_conn_cb, fd, EV_READ); + iob.data = this; + ev_io_start(loop, &iob); + } + + void processPacket() + { + using std::string; + + int pktType = iobuf[0]; + if (pktType == DINIT_CP_STARTSERVICE || pktType == DINIT_CP_STOPSERVICE) { + if (bufidx < 4) { + chklen = 4; + return; + } + + uint16_t svcSize; + memcpy(&svcSize, iobuf + 1, 2); + if (svcSize <= 0) { + // TODO error response + bufidx = 1024; // dataReady will delete - TODO clean up + } + + chklen = svcSize + 3; + if (chklen > 1024) { + // We can't have a service name this long + // TODO error response + bufidx = 1024; // TODO cleanup. + } + + if (bufidx < chklen) { + // packet not complete yet; read more + return; + } + + string serviceName(iobuf + 3, (size_t) svcSize); + if (pktType == DINIT_CP_STARTSERVICE) { + service_set->startService(serviceName.c_str()); + // TODO catch exceptions, error response + } + else { + // TODO verify the named service exists? + service_set->stopService(serviceName.c_str()); + } + + // Clear the packet from the buffer + memmove(iobuf, iobuf + chklen, 1024 - chklen); + bufidx -= chklen; + chklen = 0; + return; + } + + } + + void dataReady() + { + int fd = iob.fd; + int buffree = 1024 - bufidx; + + int r = read(fd, iobuf + bufidx, buffree); + + // Note file descriptor is non-blocking + if (r == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { + return; + } + // TODO log error + delete this; + return; + } + + if (r == 0) { + delete this; + return; + } + + bufidx += r; + buffree -= r; + + // complete packet? + if (bufidx >= chklen) { + processPacket(); + } + + if (bufidx == 1024) { + // Too big packet + // TODO log error? + // TODO error response? + delete this; + } + } + + ~ControlConn() + { + close(iob.fd); + ev_io_stop(loop, &iob); + delete [] iobuf; + } +}; + + +static void control_conn_cb(struct ev_loop * loop, ev_io * w, int revents) +{ + ControlConn *conn = (ControlConn *) w->data; + conn->dataReady(); +} diff --git a/dinit-start.cc b/dinit-start.cc new file mode 100644 index 0000000..2528dbe --- /dev/null +++ b/dinit-start.cc @@ -0,0 +1,88 @@ +#include +#include +#include +// #include +#include +#include +#include +#include +#include + +// dinit-start: utility to start a dinit service + +// This utility communicates with the dinit daemon via a unix socket (/dev/initctl). + +// TODO move these into a common include file: +constexpr static int DINIT_CP_STARTSERVICE = 0; +constexpr static int DINIT_CP_STOPSERVICE = 1; + + +int main(int argc, char **argv) +{ + using namespace std; + + bool show_help = argc < 2; + char *service_name = nullptr; + + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "--help") == 0) { + show_help = true; + break; + } + else { + cerr << "Unrecognized command-line parameter: " << argv[i] << endl; + return 1; + } + } + else { + // service name + service_name = argv[i]; + // TODO support multiple services (or at least give error if multiple services + // supplied) + } + } + + if (show_help) { + cout << "dinit-start: start a dinit service" << endl; + cout << " --help : show this help" << endl; + cout << " : start the named service" << endl; + return 1; + } + + int socknum = socket(AF_UNIX, SOCK_STREAM, 0); + if (socknum == -1) { + perror("socket"); + return 1; + } + + const char *naddr = "/dev/dinitctl"; + + struct sockaddr_un name; + name.sun_family = AF_UNIX; + // memset(name.sun_path, 0, sizeof(name.sun_path)); + strcpy(name.sun_path /* + 1 */, naddr); + int sunlen = 2 + strlen(naddr); // family, (string), nul + + int connr = connect(socknum, (struct sockaddr *) &name, sunlen); + if (connr == -1) { + perror("connect"); + return 1; + } + + // Build buffer; + uint16_t sname_len = strlen(service_name); + int bufsize = 3 + sname_len; + char * buf = new char[bufsize]; + + buf[0] = DINIT_CP_STARTSERVICE; + memcpy(buf + 1, &sname_len, 2); + memcpy(buf + 3, service_name, sname_len); + + int r = write(socknum, buf, bufsize); + if (r == -1) { + perror("write"); + } + + return 0; +} diff --git a/dinit.cc b/dinit.cc new file mode 100644 index 0000000..6fb730a --- /dev/null +++ b/dinit.cc @@ -0,0 +1,329 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "service.h" +#include "ev++.h" +#include "control.h" + + +/* TODO: prevent services from respawning too quickly */ +/* TODO: detect/guard against dependency cycles */ +/* TODO: optional automatic restart of services */ + +/* + * "simpleinit" from util-linux package handles signals as follows: + * SIGTSTP - spawn no more gettys (in preparation for shutdown etc). + * In dinit terms this should probably mean "no more auto restarts" + * (for any service). (Actually the signal acts as a toggle, if + * respawn is disabled it will be re-enabled and init will + * act as if SIGHUP had also been sent) + * SIGTERM - kill spawned gettys (which are still alive) + * Interestingly, simpleinit just sends a SIGTERM to the gettys. + * "shutdown" however has already sent SIGTERM to every process... + * "/sbin/initctl -r" - rollback services (ran by "shutdown"/halt etc) + * shouldn't return until all services have been stopped. + * shutdown calls this *after* sending SIGTERM to all processes. + * I guess this allows user processes, if any are still around, + * to die before (or just as) the services fall out from underneath + * them. On the other hand it largely subverts the ordered service + * shutdown that init provides. + * SIGQUIT - init will exec() shutdown. shutdown will detect that it is + * running as pid 1 and will just loop and reap child processes. + * This is used by shutdown so that init will not hang on to its + * inode, allowing clean filesystem unmounting. + * + * Not sent by shutdown: + * SIGHUP - re-read inittab and spawn any new getty entries + * SIGINT - (ctrl+alt+del handler) - fork & exec "reboot" + * + * On the contrary dinit currently uses: + * SIGTERM - roll back services and then exec /sbin/halt + * SIGINT - roll back services and then exec /sbin/reboot + * + * It's an open question about whether dinit should roll back services *before* + * running halt/reboot, since those commands should prompt rollback of services + * anyway. But it seems safe to do so. + */ + + +static bool got_sigterm = false; + +static ServiceSet *service_set; + +static bool am_system_init = false; // true if we are the system init process +static bool reboot = false; // whether to reboot (instead of halting) + +static void sigint_reboot_cb(struct ev_loop *loop, ev_signal *w, int revents); +static void sigquit_cb(struct ev_loop *loop, ev_signal *w, int revents); +static void sigterm_cb(struct ev_loop *loop, ev_signal *w, int revents); + +static void open_control_socket(struct ev_loop *loop); + +struct ev_io control_socket_io; + + +int main(int argc, char **argv) +{ + using namespace std; + + am_system_init = (getpid() == 1); + + if (am_system_init) { + // setup STDIN, STDOUT, STDERR so that we can use them + int onefd = open("/dev/console", O_RDONLY, 0); + dup2(onefd, 0); + int twofd = open("/dev/console", O_RDWR, 0); + dup2(twofd, 1); + dup2(twofd, 2); + } + + /* Set up signal handlers etc */ + /* SIG_CHILD is ignored by default: good */ + /* sigemptyset(&sigwait_set); */ + /* sigaddset(&sigwait_set, SIGCHLD); */ + /* sigaddset(&sigwait_set, SIGINT); */ + /* sigaddset(&sigwait_set, SIGTERM); */ + /* sigprocmask(SIG_BLOCK, &sigwait_set, NULL); */ + + /* list of services to start */ + list services_to_start; + + /* service directory name */ + const char * service_dir = "/etc/dinit.d"; + + /* arguments, if given, specify a list of services to start. */ + /* if none are given the "boot" service is started. */ + if (argc > 1) { + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + // An option... + if (strcmp(argv[i], "--services-dir") == 0 || + strcmp(argv[i], "-d") == 0) { + ++i; + if (i < argc) { + service_dir = argv[i]; + } + else { + // error TODO + } + } + else if (strcmp(argv[i], "--help") == 0) { + cout << "dinit, an init with dependency management" << endl; + cout << " --help : display help" << endl; + cout << " --services-dir , -d : set base directory for service description files (-d )" << endl; + cout << " : start service with name " << endl; + return 0; + } + else { + // unrecognized + if (! am_system_init) { + cerr << "Unrecognized option: " << argv[i] << endl; + return 1; + } + } + } + else { + services_to_start.push_back(argv[i]); + } + } + } + + if (services_to_start.empty()) { + services_to_start.push_back("boot"); + } + + // Set up signal handlers + ev_signal sigint_ev_signal; + if (am_system_init) { + ev_signal_init(&sigint_ev_signal, sigint_reboot_cb, SIGINT); + } + else { + ev_signal_init(&sigint_ev_signal, sigterm_cb, SIGINT); + } + + ev_signal sigquit_ev_signal; + if (am_system_init) { + // PID 1: SIGQUIT exec's shutdown + ev_signal_init(&sigquit_ev_signal, sigquit_cb, SIGQUIT); + } + else { + // Otherwise: SIGQUIT terminates dinit + ev_signal_init(&sigquit_ev_signal, sigterm_cb, SIGQUIT); + } + + ev_signal sigterm_ev_signal; + ev_signal_init(&sigterm_ev_signal, sigterm_cb, SIGTERM); + + /* Set up libev */ + struct ev_loop *loop = ev_default_loop(EVFLAG_AUTO /* | EVFLAG_SIGNALFD */); + ev_signal_start(loop, &sigint_ev_signal); + ev_signal_start(loop, &sigquit_ev_signal); + ev_signal_start(loop, &sigterm_ev_signal); + + // Try to open control socket (may fail due to readonly filesystem) + open_control_socket(loop); + + /* start requested services */ + service_set = new ServiceSet(service_dir); + for (list::iterator i = services_to_start.begin(); + i != services_to_start.end(); + ++i) { + try { + service_set->startService(*i); + } + catch (ServiceNotFound &snf) { + // TODO log this better + std::cerr << "Could not find service: " << snf.serviceName << endl; + } + catch (std::string err) { + std::cerr << err << std::endl; + throw err; + } + } + + event_loop: + + // Process events until all services have terminated. + while (! service_set->count_active_services() == 0) { + ev_loop(loop, EVLOOP_ONESHOT); + } + + if (am_system_init) { + cout << "dinit: No more active services."; + if (reboot) { + cout << " Will reboot."; + } + else if (got_sigterm) { + cout << " Will halt."; + } + else { + cout << " Re-initiating boot sequence."; + } + cout << endl; + } + + + if (am_system_init) { + if (reboot) { + // TODO log error from fork + if (fork() == 0) { + execl("/sbin/reboot", "/sbin/reboot", (char *) 0); + } + } + else if (got_sigterm) { + // TODO log error from fork + if (fork() == 0) { + execl("/sbin/halt", "/sbin/halt", (char *) 0); + } + } + else { + // Hmmmmmm. + // It could be that we started in single user mode, and the + // user has now exited the shell. We'll try and re-start the + // boot process... + try { + service_set->startService("boot"); + goto event_loop; // yes, the "evil" goto + } + catch (...) { + // TODO catch exceptions and log message as appropriate + // Now WTF do we do? try and reboot + if (fork() == 0) { + execl("/sbin/reboot", "/sbin/reboot", (char *) 0); + } + } + } + + // PID 1 should never exit: + while (true) { + pause(); + } + } + + return 0; +} + +// Callback for control socket +static void control_socket_cb(struct ev_loop *loop, ev_io *w, int revents) +{ + // Accept a connection + int sockfd = w->fd; + + int newfd = accept4(sockfd, nullptr, nullptr, SOCK_NONBLOCK | SOCK_CLOEXEC); + + if (newfd != -1) { + new ControlConn(loop, service_set, newfd); // will delete itself when it's finished + // TODO keep a set of control connections so that we can close them when + // terminating? + } +} + +static void open_control_socket(struct ev_loop *loop) +{ + // TODO make this use a per-user address if PID != 1, and make the address + // overridable from the command line + + const char * saddrname = "/dev/dinitctl"; + struct sockaddr_un name; + + unlink(saddrname); + + name.sun_family = AF_UNIX; + strcpy(name.sun_path, saddrname); // TODO make this safe for long names + int namelen = 2 + strlen(saddrname); + //int namelen = sizeof(name); + + int sockfd = socket(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0); + if (sockfd == -1) { + // TODO log error + perror("socket"); + return; + } + + if (bind(sockfd, (struct sockaddr *) &name, namelen) == -1) { + // TODO log error + perror("bind"); + close(sockfd); + return; + } + + if (listen(sockfd, 10) == -1) { + // TODO log error + perror("listen"); + close(sockfd); + return; + } + + ev_io_init(&control_socket_io, control_socket_cb, sockfd, EV_READ); + ev_io_start(loop, &control_socket_io); +} + +/* handle SIGINT signal (generated by kernel when ctrl+alt+del pressed) */ +static void sigint_reboot_cb(struct ev_loop *loop, ev_signal *w, int revents) +{ + reboot = true; + service_set->stop_all_services(); +} + +/* handle SIGQUIT (if we are system init) */ +static void sigquit_cb(struct ev_loop *loop, ev_signal *w, int revents) +{ + // This allows remounting the filesystem read-only if the dinit binary has been + // unlinked. In that case the kernel holds the binary open, so that it can't be + // properly removed. + execl("/sbin/shutdown", "/sbin/shutdown", (char *) 0); +} + +/* handle SIGTERM - stop all services */ +static void sigterm_cb(struct ev_loop *loop, ev_signal *w, int revents) +{ + got_sigterm = true; + service_set->stop_all_services(); +} diff --git a/load_service.cc b/load_service.cc new file mode 100644 index 0000000..324a66b --- /dev/null +++ b/load_service.cc @@ -0,0 +1,200 @@ +#include "service.h" +#include +#include +#include +#include + +typedef std::string string; +typedef std::string::iterator string_iterator; + +// Utility function to skip white space. Returns an iterator at the +// first non-white-space position (or at end). +static string_iterator skipws(string_iterator i, string_iterator end) +{ + using std::locale; + using std::isspace; + + while (i != end) { + if (! isspace(*i, locale::classic())) { + break; + } + ++i; + } + return i; +} + +// Read a setting name. +static string read_setting_name(string_iterator * const i, string_iterator end) +{ + using std::locale; + using std::ctype; + using std::use_facet; + + const ctype & facet = use_facet >(locale::classic()); + + string rval; + // Allow alphabetical characters, and dash (-) in setting name + while (*i != end && (**i == '-' || facet.is(ctype::alpha, **i))) { + rval += **i; + ++(*i); + } + return rval; +} + +// Read a setting value +// Try to allow quoted strings: +static string read_setting_value(string_iterator * const i, string_iterator end) +{ + // TODO handle quoting, error if multiple white-space separated strings + // occur without quoting (unless the second one is a '#' comment) + using std::locale; + using std::isspace; + + *i = skipws(*i, end); + + string rval; + // bool quoting = false; + + while (*i != end) { + char c = **i; + if (c == '\"') { + // quoted ... + // TODO + } + if (isspace(c, locale::classic())) { + *i = skipws(*i, end); + if (*i == end) break; + if (**i == '#') break; // comment + rval += ' '; // collapse ws to a single space + continue; + } + else if (c == '#') { + // hmm... comment? Probably, though they should have put a space + // before it really. TODO throw an exception, and document + // that '#' for comments must be preceded by space, and in values + // must be quoted. + break; + } + else { + rval += c; + } + ++(*i); + } + + return rval; +} + + +// Find a service record, or load it from file. If the service has +// dependencies, load those also. +// +// Might throw an exception if a dependency cycle is found or if another +// problem occurs (I/O error, service description not found). +ServiceRecord * ServiceSet::loadServiceRecord(const char * name) +{ + using std::string; + using std::ifstream; + using std::ios; + using std::ios_base; + using std::locale; + using std::isspace; + + // First try and find an existing record... + ServiceRecord * rval = findService(string(name)); + if (rval != 0) { + return rval; + } + + // Couldn't find one. Have to load it. + string service_filename = service_dir; + if (*(service_filename.rbegin()) != '/') { + service_filename += '/'; + } + service_filename += name; + + string command; + int service_type = SVC_PROCESS; + std::list depends_on; + string logfile; + + // TODO catch I/O exceptions, wrap & re-throw? + string line; + bool auto_restart = false; + ifstream service_file; + service_file.exceptions(ios::badbit | ios::failbit); + + try { + service_file.open(service_filename.c_str(), ios::in); + } + catch (std::ios_base::failure &exc) { + ServiceNotFound snf; + snf.serviceName = name; + throw snf; + } + + // getline can set failbit if it reaches end-of-file, we don't want an exception in that case: + service_file.exceptions(ios::badbit); + + while (! (service_file.rdstate() & ios::eofbit)) { + getline(service_file, line); + string::iterator i = line.begin(); + string::iterator end = line.end(); + + i = skipws(i, end); + if (i != end) { + if (*i == '#') { + continue; // comment line + } + string setting = read_setting_name(&i, end); + i = skipws(i, end); + if (i == end || *i != '=') { + // TODO: throw a documented exception + throw std::string("Badly formed line."); + } + i = skipws(++i, end); + + if (setting == "command") { + command = read_setting_value(&i, end); + } + else if (setting == "depends-on") { + string dependency_name = read_setting_value(&i, end); + depends_on.push_back(loadServiceRecord(dependency_name.c_str())); + } + else if (setting == "logfile") { + logfile = read_setting_value(&i, end); + } + else if (setting == "restart") { + string restart = read_setting_value(&i, end); + auto_restart = (restart == "yes" || restart == "true"); + } + else if (setting == "type") { + string type_str = read_setting_value(&i, end); + if (type_str == "scripted") { + service_type = SVC_SCRIPTED; + } + else if (type_str == "process") { + service_type = SVC_PROCESS; + } + else { + throw string("Service type must be \"scripted\"" + " or \"process\""); + // TODO throw a better exception + } + } + else { + // TODO throw a better exception + throw string("Unknown setting"); + } + } + } + + // TODO check we actually have all the settings - type, command + + rval = new ServiceRecord(this, string(name), service_type, command, + &depends_on); + rval->setLogfile(logfile); + rval->setAutoRestart(auto_restart); + + records.push_back(rval); + return rval; +} diff --git a/service.cc b/service.cc new file mode 100644 index 0000000..93ffa1a --- /dev/null +++ b/service.cc @@ -0,0 +1,493 @@ +#include "service.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// Tokenize a string, allow quoting +// TODO doesn't yet allow quoting... +static std::vector tokenize(std::string arg) +{ + // TODO rewrite to be more efficient. + using namespace std; + istringstream iss(arg); + return vector(istream_iterator(iss), istream_iterator()); +} + +// Find the requested service by name +static ServiceRecord * findService(const std::list & records, + const char *name) +{ + using std::list; + list::const_iterator i = records.begin(); + for ( ; i != records.end(); i++ ) { + if (strcmp((*i)->getServiceName(), name) == 0) { + return *i; + } + } + return (ServiceRecord *)0; +} + +ServiceRecord * ServiceSet::findService(std::string name) +{ + return ::findService(records, name.c_str()); +} + +void ServiceSet::startService(const char *name) +{ + using namespace std; + ServiceRecord *record = loadServiceRecord(name); + + record->start(); +} + +void ServiceSet::stopService(const std::string & name) +{ + ServiceRecord *record = findService(name); + if (record != nullptr) { + record->stop(); + } +} + +// Called when a service has actually stopped. +void ServiceRecord::stopped() +{ + service_state = SVC_STOPPED; + force_stop = false; + + // Stop any dependencies whose desired state is SVC_STOPPED: + for (sr_iter i = depends_on.begin(); i != depends_on.end(); i++) { + (*i)->dependentStopped(); + } + + service_set->service_inactive(this); + + // TODO inform listeners. + if (desired_state == SVC_STARTED) { + // Desired state is "started". + start(); + } +} + +void ServiceRecord::process_child_callback(struct ev_loop *loop, ev_child *w, int revents) +{ + ServiceRecord *sr = (ServiceRecord *) w->data; + + sr->pid = -1; + ev_child_stop(ev_default_loop(EVFLAG_AUTO), &sr->child_listener); + + // Ok, for a process service, any process death which we didn't rig + // ourselves is a bit... unexpected. Probably, the child died because + // we asked it to (sr->service_state == SVC_STOPPING). But even if + // we didn't, there's not much we can do. + + if (sr->service_type == SVC_PROCESS) { + // TODO log non-zero rstatus? + if (sr->service_state == SVC_STOPPING) { + sr->stopped(); + } + else { + sr->forceStop(); + sr->stop(); + } + + if (sr->auto_restart && sr->service_set->get_auto_restart()) { + sr->start(); + } + } + else { // SVC_SCRIPTED + if (sr->service_state == SVC_STOPPING) { + if (w->rstatus == 0) { + sr->stopped(); + } + else { + // TODO + // ??? failed to stop! + // For now just pretend we stopped, so that any dependencies + // can be stopped: + sr->stopped(); + } + } + else { // SVC_STARTING + if (w->rstatus == 0) { + sr->started(); + } + else { + // failed to start + sr->failed_to_start(); + } + } + } +} + +void ServiceRecord::start() +{ + if ((service_state == SVC_STARTING || service_state == SVC_STARTED) + && desired_state == SVC_STOPPED) { + // This service was starting, or started, but was set to be stopped. + // Cancel the stop (and continue starting/running). + // TODO any listeners waiting for stop should be notified of + // its cancellation + } + + desired_state = SVC_STARTED; + + if (service_state != SVC_STOPPED) { + // Either we need do nothing (service is already started/starting) + // or the service is currently being stopped and we must wait for + // that to complete. + return; + } + + // Service state is SVC_STOPPED. Start the service. + + // First, start dependencies + bool all_deps_started = true; + for (sr_iter i = depends_on.begin(); i != depends_on.end(); ++i) { + // Note, we cannot treat a dependency as started if its force_stop + // flag is set. + if ((*i)->service_state != SVC_STARTED || (*i)->force_stop) { + all_deps_started = false; + (*i)->start(); + } + } + + if (! all_deps_started) { + // The dependencies will notify this service once they've started. + return; + } + + // Actually start this service. + service_state = SVC_STARTING; + service_set->service_active(this); + + if (service_type == SVC_PROCESS) { + bool start_success = start_ps_process(); + if (start_success) { + started(); + } + else { + failed_to_start(); + } + } + else { + // Script-controlled service + bool start_success = start_ps_process(std::vector(1, "start")); + if (! start_success) { + failed_to_start(); + } + } +} + +void ServiceRecord::started() +{ + service_state = SVC_STARTED; + // TODO - inform listeners + + if (desired_state == SVC_STARTED) { + // Start any dependents whose desired state is SVC_STARTED: + for (sr_iter i = dependents.begin(); i != dependents.end(); i++) { + if ((*i)->desired_state == SVC_STARTED) { + (*i)->start(); + } + } + } + else { + stop(); + } +} + +void ServiceRecord::failed_to_start() +{ + service_state = SVC_STOPPED; + desired_state = SVC_STOPPED; + service_set->service_inactive(this); + // failure to start + // TODO - inform listeners of failure + // Cancel start of dependents: + for (sr_iter i = dependents.begin(); i != dependents.end(); i++) { + if ((*i)->desired_state == SVC_STARTED) { + (*i)->failed_dependency(); + } + } +} + +bool ServiceRecord::start_ps_process() +{ + // BIG FAT NOTE: We rely on linux semantics of vfork() here. + // Specifically: + // * Parent process execution is suspended until the forked child + // successfully exec's another program, or it exits + // * Memory is shared between the two processes until exec() + // succeeds. + // Both of the above mean that we can determine in the parent process + // whether or not the exec succeeded. If vfork instead is implemented + // as an alias of fork, it will look like the exec always succeeded. + + /* + volatile int exec_status = 0; + pid_t forkpid = vfork(); + if (forkpid == 0) { + // Child process + // ev_default_destroy(); // won't need that on this side, free up fds. + // Hmm. causes segfault. Of course. Memory is shared due to vfork. + + // Re-set stdin, stdout, stderr + close(0); close(1); close(2); + string logfile = this->logfile; + if (logfile.length() == 0) { + logfile = "/dev/null"; + } + + if (open("/dev/null", O_RDONLY) == 0) { + // stdin = 0. That's what we should have; proceed with opening + // stdout and stderr. + open(logfile.c_str(), O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR); + dup2(1, 2); + } + + const char * pname = program_name.c_str(); + char const * args[2] = { pname, 0 }; + execvp(pname, (char ** const) args); + // If we got here, the exec failed + exec_status = errno; + _exit(0); + } + else { + // Parent process - we only reach here once the exec() above + // has succeeded, or _exit() above was called (because vfork() + // suspends the parent until either of those occurs). + if (exec_status == 0) { + // success + pid = forkpid; + + // Add a process listener so we can detect when the + // service stops + ev_child_init(&child_listener, process_child_callback, pid, 0); + child_listener.data = this; + ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener); + + service_state = SVC_STARTED; + return true; + } + else { + return false; + } + } + */ + + return start_ps_process(std::vector()); +} + + +bool ServiceRecord::start_ps_process(const std::vector &pargs) +{ + // In general, you can't tell whether fork/exec is successful. We use a pipe to communicate + // success/failure from the child to the parent. The pipe is set CLOEXEC so a successful + // exec closes the pipe, and the parent sees EOF. If the exec is unsuccessful, the errno + // is written to the pipe, and the parent can read it. + + using std::vector; + using std::string; + + int pipefd[2]; + if (pipe2(pipefd, O_CLOEXEC)) { + // TODO log error + return false; + } + + // TODO make sure pipefd's are not 0/1/2 (STDIN/OUT/ERR) - if they are, dup them + // until they are not. + + pid_t forkpid = fork(); + if (forkpid == -1) { + // TODO log error + close(pipefd[0]); + close(pipefd[1]); + return false; + } + + if (forkpid == 0) { + // Child process + ev_default_destroy(); // won't need that on this side, free up fds. + + // Re-set stdin, stdout, stderr + close(0); close(1); close(2); + string logfile = this->logfile; + if (logfile.length() == 0) { + logfile = "/dev/null"; + } + + // TODO rethink this logic. If we open it at not-0, shouldn't we just dup it to 0?: + if (open("/dev/null", O_RDONLY) == 0) { + // stdin = 0. That's what we should have; proceed with opening + // stdout and stderr. + open(logfile.c_str(), O_WRONLY | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR); + dup2(1, 2); + } + + // Tokenize the command, and add additional arguments from pargs: + vector progAndArgs = tokenize(program_name); + progAndArgs.insert(progAndArgs.end(), pargs.begin(), pargs.end()); + + const char * pname = progAndArgs[0].c_str(); + const char ** args = new const char *[progAndArgs.size() + 1]; + + for (std::vector::size_type i = 0; i < progAndArgs.size(); i++) { + args[i] = progAndArgs[i].c_str(); + } + args[progAndArgs.size()] = nullptr; + + execvp(pname, (char ** const) args); + + // If we got here, the exec failed: + int exec_status = errno; + write(pipefd[1], &exec_status, sizeof(int)); + exit(0); + } + else { + // Parent process - we only reach here once the exec() above + // has succeeded, or _exit() above was called (because vfork() + // suspends the parent until either of those occurs). + + close(pipefd[1]); // close the 'other end' fd + + int exec_status; + if (read(pipefd[0], &exec_status, sizeof(int)) == 0) { + // pipe closed; success + pid = forkpid; + + // Add a process listener so we can detect when the + // service stops + ev_child_init(&child_listener, process_child_callback, pid, 0); + child_listener.data = this; + ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener); + + close(pipefd[0]); + return true; + } + else { + // TODO log error + close(pipefd[0]); + return false; + } + } +} + + + + +// Mark this and all dependent services as force-stopped. +void ServiceRecord::forceStop() +{ + force_stop = true; + for (sr_iter i = dependents.begin(); i != dependents.end(); i++) { + (*i)->forceStop(); + } +} + +// A dependency of this service failed to start. +void ServiceRecord::failed_dependency() +{ + // TODO notify listeners + desired_state = SVC_STOPPED; + + // Presumably, we were starting. So now we're not. + service_state = SVC_STOPPED; + + // Notify dependents of this service also + for (sr_iter i = dependents.begin(); i != dependents.end(); i++) { + if ((*i)->desired_state == SVC_STARTED) { + (*i)->failed_dependency(); + } + } +} + +void ServiceRecord::dependentStopped() +{ + if (desired_state == SVC_STOPPED || force_stop) { + bool all_deps_stopped = true; + for (sr_iter i = dependents.begin(); i != dependents.end(); ++i) { + if ((*i)->service_state != SVC_STOPPED) { + all_deps_stopped = false; + break; + } + } + + if (all_deps_stopped) { + stopping(); + } + } +} + +void ServiceRecord::stop() +{ + if ((service_state == SVC_STOPPING || service_state == SVC_STOPPED) + && desired_state == SVC_STARTED) { + // The service *was* stopped/stopping, but it was going to restart. + // Now, we'll cancel the restart. + // TODO inform listeners waiting for start of cancellation + } + + desired_state = SVC_STOPPED; + + if (service_state != SVC_STARTED) { + // If we're starting we need to wait for that to complete. + // If we're already stopping/stopped there's nothing to do. + return; + } + + // Make sure all dependents have stopped. + + bool all_deps_stopped = true; + for (sr_iter i = dependents.begin(); i != dependents.end(); ++i) { + if ((*i)->service_state != SVC_STOPPED) { + all_deps_stopped = false; + (*i)->stop(); + } + } + + if (! all_deps_stopped) { + // The dependents will notify this service once they've stopped. + return; + } + + // Ok, dependents have stopped. We can stop ourselves. + stopping(); +} + +// Dependency stopped or is stopping; we must stop too. +void ServiceRecord::stopping() +{ + service_state = SVC_STOPPING; + + if (service_type == SVC_PROCESS) { + if (pid != -1) { + // The process is still kicking on - must actually kill it. + kill(pid, SIGTERM); + // Now we wait; the rest is done in process_child_callback + } + else { + // The process is already dead. + stopped(); + } + } + else { + // Scripted service. + start_ps_process(std::vector(1, "stop")); + } +} + +void ServiceSet::service_active(ServiceRecord *sr) +{ + active_services++; +} + +void ServiceSet::service_inactive(ServiceRecord *sr) +{ + active_services--; +} diff --git a/service.h b/service.h new file mode 100644 index 0000000..1b09dcf --- /dev/null +++ b/service.h @@ -0,0 +1,204 @@ +#include +#include +#include +#include "ev.h" + +/* Possible service states */ +#define SVC_STOPPED 0 /* service is not running */ +#define SVC_STARTING 1 /* service script is running with "start" */ +#define SVC_STARTED 2 /* service is running; start script finished. */ +#define SVC_STOPPING 3 /* service script is running with "stop" */ + +/* Service types */ +#define SVC_PROCESS 0 /* service runs as a process, and can be stopped + by sending the process a signal */ +#define SVC_SCRIPTED 1 /* service requires a command to start, and another + command to stop */ + + + +// Exception +class ServiceNotFound +{ + public: + std::string serviceName; +}; + + +class ServiceSet; // forward declaration + +class ServiceRecord +{ + typedef std::string string; + + string service_name; + int service_type; /* SVC_DAEMON or SVC_SCRIPTED */ + int service_state; /* SVC_STOPPED, _STARTING, _STARTED, _STOPPING */ + int desired_state; /* SVC_STOPPED / SVC_STARTED */ + bool force_stop; // true if the service must actually stop. This is the + // case if for example the process dies; the service, + // and all its dependencies, MUST be stopped. + string program_name; /* executable program or script */ + string logfile; /* log file name, empty string specifies /dev/null */ + bool auto_restart; /* whether to restart this (process) if it dies */ + + typedef std::list sr_list; + typedef sr_list::iterator sr_iter; + + sr_list depends_on; // services this one depends on + sr_list dependents; // services depending on this one + // unsigned wait_count; /* if we are waiting for dependents/dependencies to + // start/stop, this is how many we're waiting for */ + + ServiceSet *service_set; // the set this service belongs to + + // Implementation details + + pid_t pid; /* PID of the process. If state is STARTING or STOPPING, + this is PID of the service script; otherwise it is the + PID of the process itself (process service). + */ + + ev_child child_listener; + + // Move service to STOPPING state. This can only be called once + // all dependents have stopped. + void stopping(); + + // Service has actually stopped (includes having all dependents + // reaching STOPPED state). + void stopped(); + + // Service has successfully started + void started(); + + // Service failed to start + void failed_to_start(); + + // A dependency of this service failed to start. + void failed_dependency(); + + // For process services, start the process, return true on success + bool start_ps_process(); + bool start_ps_process(const std::vector &args); + + // Callback from libev when a child process dies + static void process_child_callback(struct ev_loop *loop, struct ev_child *w, + int revents); + + void dependentStopped(); // called when a dependent stopped + + void forceStop(); // force-stop this service and all dependents + + public: + ServiceRecord(ServiceSet *set, string name, int service_type, string command, + std::list * pdepends_on) + { + service_state = SVC_STOPPED; + desired_state = SVC_STOPPED; + + service_set = set; + service_name = name; + this->service_type = service_type; + program_name = command; + auto_restart = false; + // TODO splice the contents from the depends_on parameter + // rather than duplicating the list. + this->depends_on = *pdepends_on; + + // For each dependency, add us as a dependent. + for (sr_iter i = depends_on.begin(); i != depends_on.end(); ++i) { + (*i)->dependents.push_back(this); + } + } + + // Set logfile, should be done before service is started + void setLogfile(string logfile) + { + this->logfile = logfile; + } + + // Set whether this service should automatically restart when it dies + void setAutoRestart(bool auto_restart) + { + this->auto_restart = auto_restart; + } + + const char *getServiceName() const { return service_name.c_str(); } + int getState() const { return service_state; } + + void start(); // start the service + void stop(); // stop the service +}; + + +class ServiceSet +{ + int active_services; + std::list records; + const char *service_dir; // directory containing service descriptions + bool restart_enabled; // whether automatic restart is enabled (allowed) + + // Private methods + + // Locate an existing service record. + ServiceRecord *findService(std::string name); + + // Load a service description, and dependencies, if there is no existing + // record for the given name. + ServiceRecord *loadServiceRecord(const char *name); + + // Public + + public: + ServiceSet(const char *service_dir) + { + this->service_dir = service_dir; + active_services = 0; + restart_enabled = true; + } + + // Start the service with the given name. The named service will begin + // transition to the 'started' state. + // + // Throws an exception if the + // service description cannot be loaded. + void startService(const char *name); + + // Stop the service with the given name. The named service will begin + // transition to the 'stopped' state. + void stopService(const std::string &name); + + // Notification from service that it is active (state != SVC_STOPPED) + // Only to be called on the transition from inactive to active. + void service_active(ServiceRecord *); + + // Notification from service that it is inactive (SVC_STOPPED) + // Only to be called on the transition from active to inactive. + void service_inactive(ServiceRecord *); + + // Find out how many services are active (starting, running or stopping, + // but not stopped). + int count_active_services() + { + return active_services; + } + + void stop_all_services() + { + restart_enabled = false; + for (std::list::iterator i = records.begin(); i != records.end(); ++i) { + (*i)->stop(); + } + } + + void set_auto_restart(bool restart) + { + restart_enabled = restart; + } + + bool get_auto_restart() + { + return restart_enabled; + } +}; -- 2.25.1