Initial implementation of service reloading
authorDavin McCall <davmac@davmac.org>
Sun, 1 Dec 2019 01:27:12 +0000 (11:27 +1000)
committerDavin McCall <davmac@davmac.org>
Sun, 1 Dec 2019 04:59:35 +0000 (14:59 +1000)
src/includes/proc-service.h
src/includes/service.h
src/load-service.cc
src/service.cc

index 010037a18f83eb14db75fdcbc9d8a79eaa78e505..99aad596b03dbae0a52ab4eb9761608726b434d4 100644 (file)
@@ -255,6 +255,20 @@ class base_process_service : public service_record
         restart_timer.deregister(event_loop);
     }
 
+    // Set the command to run this service (executable and arguments, nul separated). The command_parts_p
+    // vector must contain pointers to each part.
+    void set_command(std::string &&command_p, std::vector<const char *> &&command_parts_p) noexcept
+    {
+        program_name = std::move(command_p);
+        exec_arg_parts = std::move(command_parts_p);
+    }
+
+    void get_command(std::string &command_p, std::vector<const char *> &command_parts_p)
+    {
+        command_p = program_name;
+        command_parts_p = exec_arg_parts;
+    }
+
     // Set the stop command and arguments (may throw std::bad_alloc)
     void set_stop_command(const std::string &command,
             std::list<std::pair<unsigned,unsigned>> &stop_command_offsets)
@@ -263,11 +277,26 @@ class base_process_service : public service_record
         stop_arg_parts = separate_args(stop_command, stop_command_offsets);
     }
 
+    // Set the stop command as a sequence of nul-terminated parts (arguments).
+    //   command - the command and arguments, each terminated with nul ('\0')
+    //   command_parts - pointers to the beginning of each command part
+    void set_stop_command(std::string &&command,
+            std::vector<const char *> &&command_parts) noexcept
+    {
+        stop_command = std::move(command);
+        stop_arg_parts = std::move(command_parts);
+    }
+
     void set_env_file(const std::string &env_file_p)
     {
         env_file = env_file_p;
     }
 
+    void set_env_file(std::string &&env_file_p) noexcept
+    {
+        env_file = std::move(env_file_p);
+    }
+
     void set_rlimits(std::vector<service_rlimits> &&rlimits_p)
     {
         rlimits = std::move(rlimits_p);
@@ -313,6 +342,11 @@ class base_process_service : public service_record
         working_dir = working_dir_p;
     }
 
+    void set_working_dir(string &&working_dir_p) noexcept
+    {
+        working_dir = std::move(working_dir_p);
+    }
+
     // Set the notification fd number that the service process will use
     void set_notification_fd(int fd)
     {
index ef1b6a67651fbc0eb27534e7920f4abbd7e4eac3..3bdad9e6d844d63a90fe93cacf89174855a74cba 100644 (file)
@@ -153,6 +153,11 @@ class service_dep
     {
         return to;
     }
+
+    void set_to(service_record *new_to) noexcept
+    {
+        to = new_to;
+    }
 };
 
 /* preliminary service dependency information */
@@ -454,6 +459,11 @@ class service_record
         this->logfile = logfile;
     }
     
+    void set_log_file(std::string &&logfile) noexcept
+    {
+        this->logfile = std::move(logfile);
+    }
+
     // Set whether this service should automatically restart when it dies
     void set_auto_restart(bool auto_restart) noexcept
     {
@@ -486,7 +496,7 @@ class service_record
     }
 
     // Set the service that this one "chains" to. When this service completes, the named service is started.
-    void set_chain_to(string &&chain_to)
+    void set_chain_to(string &&chain_to) noexcept
     {
         start_on_completion = std::move(chain_to);
     }
@@ -606,23 +616,36 @@ class service_record
     // calling this. May throw std::bad_alloc.
     service_dep & add_dep(service_record *to, dependency_type dep_type)
     {
-        depends_on.emplace_back(this, to, dep_type);
+        return add_dep(to, dep_type, depends_on.end(), false);
+    }
+
+    // Add a dependency. Caller must ensure that the services are in an appropriate state and that
+    // a circular dependency chain is not created. Propagation queues should be processed after
+    // calling this. May throw std::bad_alloc.
+    //   i - where to insert the dependency (in dependencies list)
+    //   reattach - whether to acquire the required service if it and the dependent are started.
+    //             (if false, only REGULAR dependencies will cause acquire if the dependent is started,
+    //              doing so regardless of required service's state).
+    service_dep & add_dep(service_record *to, dependency_type dep_type, dep_list::iterator i, bool reattach)
+    {
+        auto pre_i = depends_on.emplace(i, this, to, dep_type);
         try {
-            to->dependents.push_back(& depends_on.back());
+            to->dependents.push_back(&(*pre_i));
         }
         catch (...) {
-            depends_on.pop_back();
+            depends_on.erase(i);
             throw;
         }
 
-        if (dep_type == dependency_type::REGULAR) {
+        if (dep_type == dependency_type::REGULAR
+                || (reattach && to->get_state() == service_state_t::STARTED)) {
             if (service_state == service_state_t::STARTING || service_state == service_state_t::STARTED) {
                 to->require();
-                depends_on.back().holding_acq = true;
+                pre_i->holding_acq = true;
             }
         }
 
-        return depends_on.back();
+        return *pre_i;
     }
 
     // Remove a dependency, of the given type, to the given service. Propagation queues should be processed
@@ -632,21 +655,27 @@ class service_record
         for (auto i = depends_on.begin(); i != depends_on.end(); i++) {
             auto & dep = *i;
             if (dep.get_to() == to && dep.dep_type == dep_type) {
-                for (auto j = to->dependents.begin(); ; j++) {
-                    if (*j == &dep) {
-                        to->dependents.erase(j);
-                        break;
-                    }
-                }
-                if (dep.holding_acq) {
-                    to->release();
-                }
-                depends_on.erase(i);
+                rm_dep(i);
                 break;
             }
         }
     }
 
+    dep_list::iterator rm_dep(dep_list::iterator i) noexcept
+    {
+        auto to = i->get_to();
+        for (auto j = to->dependents.begin(); ; j++) {
+            if (*j == &(*i)) {
+                to->dependents.erase(j);
+                break;
+            }
+        }
+        if (i->holding_acq) {
+            to->release();
+        }
+        return depends_on.erase(i);
+    }
+
     // Start a speficic dependency of this service. Should only be called if this service is in an
     // appropriate state (started, starting). The dependency is marked as holding acquired; when
     // this service stops, the dependency will be released and may also stop.
@@ -753,6 +782,16 @@ class service_set
         return r;
     }
 
+    // Re-load a service description from file. If the service type changes then this returns
+    // a new service instead (the old one should be removed and deleted by the caller).
+    // Throws:
+    //   service_load_exc (or subclass) on problem with service description
+    //   std::bad_alloc on out-of-memory condition
+    virtual service_record *reload_service(service_record *service)
+    {
+        return service;
+    }
+
     // Start the service with the given name. The named service will begin
     // transition to the 'started' state.
     //
@@ -933,7 +972,14 @@ class dirload_service_set : public service_set
         return service_dirs[n].get_dir();
     }
 
-    service_record *load_service(const char *name) override;
+    service_record *load_service(const char *name) override
+    {
+        return load_service(name, nullptr);
+    }
+
+    service_record *load_service(const char *name, const service_record *avoid_circular);
+
+    service_record *reload_service(service_record *service) override;
 
     int get_set_type_id() override
     {
index 35de6519331f88435bfab1cb669abd7f6b4889a1..f5f01fdb7d22c85598f2a0429c90800c8770cc64 100644 (file)
@@ -2,8 +2,8 @@
 #include <string>
 #include <fstream>
 #include <locale>
-#include <iostream>
 #include <limits>
+#include <list>
 
 #include <cstring>
 #include <cstdlib>
@@ -75,7 +75,8 @@ static void process_dep_dir(dirload_service_set &sset,
         const char *servicename,
         const string &service_filename,
         std::list<prelim_dep> &deplist, const std::string &depdirpath,
-        dependency_type dep_type)
+        dependency_type dep_type,
+        const service_record *avoid_circular)
 {
     std::string depdir_fname = combine_paths(parent_path(service_filename), depdirpath.c_str());
 
@@ -118,7 +119,7 @@ static void process_dep_dir(dirload_service_set &sset,
 // problem occurs (I/O error, service description not found etc). Throws std::bad_alloc
 // if a memory allocation failure occurs.
 //
-service_record * dirload_service_set::load_service(const char * name)
+service_record * dirload_service_set::load_service(const char * name, const service_record *avoid_circular)
 {
     using std::string;
     using std::ifstream;
@@ -134,8 +135,8 @@ service_record * dirload_service_set::load_service(const char * name)
 
     // First try and find an existing record...
     service_record * rval = find_service(string(name));
-    if (rval != 0) {
-        if (rval->is_dummy()) {
+    if (rval != nullptr) {
+        if (rval == avoid_circular || rval->is_dummy()) {
             throw service_cyclic_dependency(name);
         }
         return rval;
@@ -178,11 +179,11 @@ service_record * dirload_service_set::load_service(const char * name)
 
             auto process_dep_dir_n = [&](std::list<prelim_dep> &deplist, const std::string &waitsford,
                     dependency_type dep_type) -> void {
-                process_dep_dir(*this, name, service_filename, deplist, waitsford, dep_type);
+                process_dep_dir(*this, name, service_filename, deplist, waitsford, dep_type, avoid_circular);
             };
 
             auto load_service_n = [&](const string &dep_name) -> service_record * {
-                return load_service(dep_name.c_str());
+                return load_service(dep_name.c_str(), avoid_circular);
             };
 
             process_service_line(settings, name, line, setting, i, end, load_service_n, process_dep_dir_n);
@@ -293,3 +294,351 @@ service_record * dirload_service_set::load_service(const char * name)
         throw;
     }
 }
+
+// Update the dependencies of the specified service atomically. May fail with bad_alloc.
+static void update_depenencies(service_record *service,
+        dinit_load::service_settings_wrapper<prelim_dep> &settings)
+{
+    std::list<service_dep> &deps = service->get_dependencies();
+    auto first_preexisting = deps.begin();
+
+    // build a set of services currently issuing acquisition
+    std::unordered_set<service_record *> deps_with_acqs;
+    for (auto i = deps.begin(), e = deps.end(); i != e; ++i) {
+        if (i->holding_acq) {
+            deps_with_acqs.insert(i->get_to());
+        }
+    }
+
+    try {
+        // Insert all the new dependencies before the first pre-existing dependency
+        for (auto &new_dep : settings.depends) {
+            bool has_acq = deps_with_acqs.count(new_dep.to);
+            service->add_dep(new_dep.to, new_dep.dep_type, first_preexisting, has_acq);
+        }
+    }
+    catch (...) {
+        // remove the inserted dependencies
+        for (auto i = deps.begin(); i != first_preexisting; ++i) {
+            i = service->rm_dep(i);
+        }
+
+        // re-throw the exception
+        throw;
+    }
+
+    // Now remove all pre-existing dependencies (no exceptions possible from here).
+    for( ; first_preexisting != deps.end(); ) {
+        first_preexisting = service->rm_dep(first_preexisting);
+    }
+}
+
+// Update the command, and dependencies, of the specified service atomically. May fail with bad_alloc.
+static void update_command_and_dependencies(base_process_service *service,
+        dinit_load::service_settings_wrapper<prelim_dep> &settings)
+{
+    // Get the current command parts
+    std::string orig_cmd; std::vector<const char *> orig_arg_parts;
+    service->get_command(orig_cmd, orig_arg_parts);
+
+    // Separate the new command parts and set
+    std::vector<const char *> cmd_arg_parts = separate_args(settings.command, settings.command_offsets);
+    service->set_command(std::move(settings.command), std::move(cmd_arg_parts));
+
+    try {
+        update_depenencies(service, settings);
+    }
+    catch (...) {
+        // restore original command
+        service->set_command(std::move(orig_cmd), std::move(orig_arg_parts));
+
+        // re-throw the exception
+        throw;
+    }
+}
+
+service_record * dirload_service_set::reload_service(service_record * service)
+{
+    // We have the following problems:
+    // - ideally want to allow changing service type, at least for stopped services. That implies creating
+    //   a new (replacement) service_record object, at least in cases where the type does change.
+    // - dependencies may change (including addition of new dependencies which aren't yet loaded). We need
+    //   to prevent cyclic dependencies forming.
+    // - We want atomicity. If any new settings are not valid/alterable, or if a cyclic dependency is
+    //   created, nothing should change. Ideally this would extend to unloading any dependencies which were
+    //   loaded as part of the reload attempt.
+    // - We need to either transfer handles referring to the old service (so that they refer to the new
+    //   service), or make them invalid. Or, we alter the original service without creating a new one
+    //   (which we can only do if the type doesn't change).
+
+    // Approach:
+    // - remember the initial service count, so we can remove services loaded as part of the reload
+    //   operation if we want to abort it later (i.e. if service count changed from N to N+X, remove the
+    //   last X services)
+    // - check that the new settings are valid (if the service is running, check if the settings can be
+    //   altered, though we may just defer some changes until service is restarted)
+    // - check all dependencies of the newly created service record for cyclic dependencies, via depth-first
+    //   traversal.
+    // - If changing type:
+    //   - create the service initially just as if loading a new service (but with no dummy placeholder,
+    //     use the original service for that).
+    //   - switch all dependents to depend on the new record. Copy necessary runtime data from the original
+    //     to the new service record. Remove dependencies from the old record, and release any dependency
+    //     services as appropriate (so they stop if no longer needed). Finally, remove the old service
+    //     record and delete it.
+    //  Otherwise:
+    //   - copy the new settings to the existing service
+    //   - fix dependencies
+    //
+    // Limitations:
+    // - caller must check there are no handles (or only a single requesting handle) to the service before
+    //   calling
+    // - cannot change the type of a non-stopped service
+
+    using std::string;
+    using std::ifstream;
+    using std::ios;
+    using std::ios_base;
+    using std::locale;
+    using std::isspace;
+
+    using std::list;
+    using std::pair;
+
+    using namespace dinit_load;
+
+    service_record *rval = nullptr;
+    const string &name = service->get_name();
+
+    ifstream service_file;
+    string service_filename;
+
+    // Couldn't find one. Have to load it.
+    for (auto &service_dir : service_dirs) {
+        service_filename = service_dir.get_dir();
+        if (*(service_filename.rbegin()) != '/') {
+            service_filename += '/';
+        }
+        service_filename += name;
+
+        service_file.open(service_filename.c_str(), ios::in);
+        if (service_file) break;
+    }
+
+    if (! service_file) {
+        throw service_not_found(string(name));
+    }
+
+    service_settings_wrapper<prelim_dep> settings;
+
+    string line;
+    // getline can set failbit if it reaches end-of-file, we don't want an exception in that case. There's
+    // no good way to handle an I/O error however, so we'll have exceptions thrown on badbit:
+    service_file.exceptions(ios::badbit);
+
+    bool create_new_record = true;
+
+    try {
+        process_service_file(name, service_file,
+                [&](string &line, string &setting, string_iterator &i, string_iterator &end) -> void {
+
+            auto process_dep_dir_n = [&](std::list<prelim_dep> &deplist, const std::string &waitsford,
+                    dependency_type dep_type) -> void {
+                process_dep_dir(*this, name.c_str(), service_filename, deplist, waitsford, dep_type, service);
+            };
+
+            auto load_service_n = [&](const string &dep_name) -> service_record * {
+                return load_service(dep_name.c_str(), service);
+            };
+
+            process_service_line(settings, name.c_str(), line, setting, i, end, load_service_n, process_dep_dir_n);
+        });
+
+        service_file.close();
+
+        auto service_type = settings.service_type;
+
+        if (service_type == service_type_t::PROCESS || service_type == service_type_t::BGPROCESS
+                || service_type == service_type_t::SCRIPTED) {
+            if (settings.command.length() == 0) {
+                throw service_description_exc(name, "Service command not specified.");
+            }
+        }
+
+        // Make sure settings are able to be changed/are compatible
+        if (service->get_state() != service_state_t::STOPPED) {
+            // Can not change type of a running service.
+            if (service_type != service->get_type()) {
+                throw service_description_exc(name, "Cannot change type of non-stopped service.");
+            }
+            // Can not alter a starting/stopping service, at least for now.
+            if (service->get_state() != service_state_t::STARTED) {
+                throw service_description_exc(name,
+                        "Cannot alter settings for service which is currently starting/stopping.");
+            }
+
+            // Check validity of dependencies (if started, regular deps must be started)
+            for (auto &new_dep : settings.depends) {
+                if (new_dep.dep_type == dependency_type::REGULAR) {
+                    if (new_dep.to->get_state() != service_state_t::STARTED) {
+                        throw service_description_exc(name,
+                                std::string("Cannot add non-started dependency '")
+                                    + new_dep.to->get_name() + "'.");
+                    }
+                }
+            }
+
+            //    XXX cannot change pid_file
+            //    XXX cannot change service flags: runs_on_console, shares_console
+            //    XXX cannot change inittab_id/inittab_line
+
+            // Already started; we must replace settings on existing service record
+            create_new_record = false;
+        }
+
+        // Note, we need to be very careful to handle exceptions properly and roll back any changes that
+        // we've made before the exception occurred.
+
+        if (service_type == service_type_t::PROCESS) {
+            do_env_subst(settings.command, settings.command_offsets, settings.do_sub_vars);
+            process_service *rvalps;
+            if (create_new_record) {
+                rvalps = new process_service(this, string(name), std::move(settings.command),
+                        settings.command_offsets, settings.depends);
+            }
+            else {
+                rvalps = static_cast<process_service *>(service);
+                update_command_and_dependencies(rvalps, settings);
+            }
+            rval = rvalps;
+            // All of the following should be noexcept or must perform rollback on exception
+            rvalps->set_working_dir(std::move(settings.working_dir));
+            rvalps->set_env_file(std::move(settings.env_file));
+            rvalps->set_rlimits(std::move(settings.rlimits));
+            rvalps->set_restart_interval(settings.restart_interval, settings.max_restarts);
+            rvalps->set_restart_delay(settings.restart_delay);
+            rvalps->set_stop_timeout(settings.stop_timeout);
+            rvalps->set_start_timeout(settings.start_timeout);
+            rvalps->set_extra_termination_signal(settings.term_signal);
+            rvalps->set_run_as_uid_gid(settings.run_as_uid, settings.run_as_gid);
+            rvalps->set_notification_fd(settings.readiness_fd);
+            rvalps->set_notification_var(std::move(settings.readiness_var));
+            #if USE_UTMPX
+            rvalps->set_utmp_id(settings.inittab_id);
+            rvalps->set_utmp_line(settings.inittab_line);
+            #endif
+        }
+        else if (service_type == service_type_t::BGPROCESS) {
+            do_env_subst(settings.command, settings.command_offsets, settings.do_sub_vars);
+            bgproc_service *rvalps;
+            if (create_new_record) {
+                rvalps = new bgproc_service(this, string(name), std::move(settings.command),
+                        settings.command_offsets, settings.depends);
+            }
+            else {
+                rvalps = static_cast<bgproc_service *>(service);
+                update_command_and_dependencies(rvalps, settings);
+            }
+            rval = rvalps;
+            // All of the following should be noexcept or must perform rollback on exception
+            rvalps->set_working_dir(std::move(settings.working_dir));
+            rvalps->set_env_file(std::move(settings.env_file));
+            rvalps->set_rlimits(std::move(settings.rlimits));
+            rvalps->set_pid_file(std::move(settings.pid_file));
+            rvalps->set_restart_interval(settings.restart_interval, settings.max_restarts);
+            rvalps->set_restart_delay(settings.restart_delay);
+            rvalps->set_stop_timeout(settings.stop_timeout);
+            rvalps->set_start_timeout(settings.start_timeout);
+            rvalps->set_extra_termination_signal(settings.term_signal);
+            rvalps->set_run_as_uid_gid(settings.run_as_uid, settings.run_as_gid);
+            settings.onstart_flags.runs_on_console = false;
+        }
+        else if (service_type == service_type_t::SCRIPTED) {
+            do_env_subst(settings.command, settings.command_offsets, settings.do_sub_vars);
+            std::vector<const char *> stop_arg_parts = separate_args(settings.stop_command, settings.stop_command_offsets);
+            scripted_service *rvalps;
+            if (create_new_record) {
+                rvalps = new scripted_service(this, string(name), std::move(settings.command),
+                        settings.command_offsets, settings.depends);
+            }
+            else {
+                rvalps = static_cast<scripted_service *>(service);
+                update_command_and_dependencies(rvalps, settings);
+            }
+            rval = rvalps;
+            // All of the following should be noexcept or must perform rollback on exception
+            rvalps->set_stop_command(std::move(settings.stop_command), std::move(stop_arg_parts));
+            rvalps->set_working_dir(std::move(settings.working_dir));
+            rvalps->set_env_file(std::move(settings.env_file));
+            rvalps->set_rlimits(std::move(settings.rlimits));
+            rvalps->set_stop_timeout(settings.stop_timeout);
+            rvalps->set_start_timeout(settings.start_timeout);
+            rvalps->set_extra_termination_signal(settings.term_signal);
+            rvalps->set_run_as_uid_gid(settings.run_as_uid, settings.run_as_gid);
+        }
+        else {
+            if (create_new_record) {
+                rval = new service_record(this, string(name), service_type, settings.depends);
+            }
+            else {
+                rval = service;
+                update_depenencies(rval, settings);
+            }
+        }
+
+        rval->set_log_file(std::move(settings.logfile));
+        rval->set_auto_restart(settings.auto_restart);
+        rval->set_smooth_recovery(settings.smooth_recovery);
+        rval->set_flags(settings.onstart_flags);
+        rval->set_socket_details(std::move(settings.socket_path), settings.socket_perms,
+                settings.socket_uid, settings.socket_gid);
+        rval->set_chain_to(std::move(settings.chain_to_name));
+
+        if (create_new_record) {
+            // switch dependencies to old record so that they refer to the new record
+
+            // Add dependent-link for all dependencies. Add to the new service first, so we can rollback
+            // on failure:
+            int added_dep_links = 0;
+            try {
+                for (auto &dep : rval->get_dependencies()) {
+                    dep.get_to()->get_dependents().push_back(&dep);
+                    added_dep_links++;
+                }
+            }
+            catch (...) {
+                for (auto &dep : rval->get_dependencies()) {
+                    if (added_dep_links-- == 0) break;
+                    dep.get_to()->get_dependents().pop_back();
+                }
+                throw;
+            }
+
+            // Remove dependent-link for all dependencies from the original:
+            service->prepare_for_unload();
+
+            // Set links in all dependents to the original to point to the new service:
+            rval->get_dependents() = std::move(service->get_dependents());
+            for (auto n : rval->get_dependents()) {
+                n->set_to(rval);
+            }
+        }
+
+        return rval;
+    }
+    catch (setting_exception &setting_exc)
+    {
+        if (create_new_record) delete rval;
+        throw service_description_exc(name, std::move(setting_exc.get_info()));
+    }
+    catch (std::system_error &sys_err)
+    {
+        if (create_new_record) delete rval;
+        throw service_description_exc(name, sys_err.what());
+    }
+    catch (...) // (should only be std::bad_alloc / service_description_exc)
+    {
+        if (create_new_record) delete rval;
+        throw;
+    }
+}
index d83a4af6d2e085d67803601b4774da00140b0b66..b0a795809b55cd7e3c9cc5a27da363e236ce9dde 100644 (file)
@@ -146,6 +146,7 @@ void service_record::release(bool issue_stop) noexcept
 {
     if (--required_by == 0) {
         desired_state = service_state_t::STOPPED;
+        prop_require = false;
 
         // Can stop, and can release dependencies now. We don't need to issue a release if
         // the require was pending though:
@@ -604,6 +605,8 @@ bool service_record::stop_dependents() noexcept
             dept->get_from()->prop_stop = true;
             services->add_prop_queue(dept->get_from());
         }
+        // Note that soft dependencies are held (for now). If we restart, we don't want those dependencies
+        // to be broken.
     }
 
     return all_deps_stopped;