Make restart check interval configurable.
authorDavin McCall <davmac@davmac.org>
Mon, 5 Jun 2017 22:06:21 +0000 (23:06 +0100)
committerDavin McCall <davmac@davmac.org>
Mon, 5 Jun 2017 22:06:21 +0000 (23:06 +0100)
README
src/load_service.cc
src/service.cc
src/service.h

diff --git a/README b/README
index c9213a7056463a94d1001f0b2d939ed2919dc5ba..34b5969e14e5ad6503cb9191e0238e986c859d83 100644 (file)
--- a/README
+++ b/README
@@ -164,6 +164,13 @@ smooth-recovery = yes | true | no | false
    the service does not reach the stopped state when the process terminates
    unexpectedly).
 
+restart-limit-interval = XXX.YYYY
+   Specifies the interval, in seconds, over which restarts are limited. If a
+   process automatically restarts more than a certain number of times (default
+   3) in this time interval, it will not restart again. The default value is
+   10 seconds. Use this to prevent broken services from continuously
+   restarting ad infinitum.
+
 pid-file = (path to file)
    For "bgprocess" type services only; specifies the path of the file where
    daemon will write its process ID before detaching.
index e01f50c80f3636d1b16e3f546a5f720f13f378fd..1ade7cd03444480f19c64feb1313d8bb57ea293f 100644 (file)
@@ -55,11 +55,11 @@ namespace {
         std::string info;
         
         public:
-        SettingException(const std::string &exc_info) : info(exc_info)
+        SettingException(const std::string &&exc_info) : info(std::move(exc_info))
         {
         }
         
-        const std::string &getInfo()
+        std::string &getInfo()
         {
             return info;
         }
@@ -233,10 +233,10 @@ static uid_t parse_uid_param(const std::string &param, const std::string &servic
     if (pwent == nullptr) {
         // Maybe an error, maybe just no entry.
         if (errno == 0) {
-            throw new ServiceDescriptionExc(service_name, "Specified user \"" + param + "\" does not exist in system database.");
+            throw ServiceDescriptionExc(service_name, "Specified user \"" + param + "\" does not exist in system database.");
         }
         else {
-            throw new ServiceDescriptionExc(service_name, std::string("Error accessing user database: ") + strerror(errno));
+            throw ServiceDescriptionExc(service_name, std::string("Error accessing user database: ") + strerror(errno));
         }
     }
     
@@ -280,10 +280,10 @@ static gid_t parse_gid_param(const std::string &param, const std::string &servic
     if (grent == nullptr) {
         // Maybe an error, maybe just no entry.
         if (errno == 0) {
-            throw new ServiceDescriptionExc(service_name, "Specified group \"" + param + "\" does not exist in system database.");
+            throw ServiceDescriptionExc(service_name, "Specified group \"" + param + "\" does not exist in system database.");
         }
         else {
-            throw new ServiceDescriptionExc(service_name, std::string("Error accessing group database: ") + strerror(errno));
+            throw ServiceDescriptionExc(service_name, std::string("Error accessing group database: ") + strerror(errno));
         }
     }
     
@@ -344,6 +344,11 @@ ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
     // invalid value, so it's safe to assume that we can do the same:
     uid_t socket_uid = -1;
     gid_t socket_gid = -1;
+    // Restart limit interval / count; default is 10 seconds, 3 restarts:
+    timespec restart_interval;
+    restart_interval.tv_sec = 10;
+    restart_interval.tv_nsec = 0;
+    int max_restarts = 3;
     
     string line;
     ifstream service_file;
@@ -478,7 +483,7 @@ ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
                             onstart_flags.pass_cs_fd = true;
                         }
                         else {
-                            throw new ServiceDescriptionExc(name, "Unknown option: " + option_txt);
+                            throw ServiceDescriptionExc(name, "Unknown option: " + option_txt);
                         }
                     }
                 }
@@ -486,12 +491,47 @@ ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
                     string signame = read_setting_value(i, end, nullptr);
                     int signo = signalNameToNumber(signame);
                     if (signo == -1) {
-                        throw new ServiceDescriptionExc(name, "Unknown/unsupported termination signal: " + signame);
+                        throw ServiceDescriptionExc(name, "Unknown/unsupported termination signal: " + signame);
                     }
                     else {
                         term_signal = signo;
                     }
                 }
+                else if (setting == "restart-limit-interval") {
+                    string interval_str = read_setting_value(i, end, nullptr);
+                    decltype(restart_interval.tv_sec) isec = 0;
+                    decltype(restart_interval.tv_nsec) insec = 0;
+                    auto max_secs = std::numeric_limits<decltype(isec)>::max() / 10;
+                    auto len = interval_str.length();
+                    int i;
+                    for (i = 0; i < len; i++) {
+                        char ch = interval_str[i];
+                        if (ch == '.') {
+                            i++;
+                            break;
+                        }
+                        if (ch < '0' || ch > '9') {
+                            throw ServiceDescriptionExc(name, "Bad value for restart-limit-interval");
+                        }
+                        // check for overflow
+                        if (isec >= max_secs) {
+                           throw ServiceDescriptionExc(name, "Too-large value for restart-limit-interval");
+                        }
+                        isec *= 10;
+                        isec += ch - '0';
+                    }
+                    decltype(insec) insec_m = 100000000; // 10^8
+                    for ( ; i < len; i++) {
+                        char ch = interval_str[i];
+                        if (ch < '0' || ch > '9') {
+                            throw ServiceDescriptionExc(name, "Bad value for restart-limit-interval");
+                        }
+                        insec += (ch - '0') * insec_m;
+                        insec_m /= 10;
+                    }
+                    restart_interval.tv_sec = isec;
+                    restart_interval.tv_nsec = insec;
+                }
                 else {
                     throw ServiceDescriptionExc(name, "Unknown setting: " + setting);
                 }
@@ -512,13 +552,17 @@ ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
                 // We've found the dummy record
                 delete rval;
                 if (service_type == ServiceType::PROCESS) {
-                    rval = new process_service(this, string(name), std::move(command),
+                    auto rvalps = new process_service(this, string(name), std::move(command),
                         command_offsets, &depends_on, &depends_soft);
+                    rvalps->set_restart_interval(restart_interval, max_restarts);
+                    rval = rvalps;
                 }
                 else if (service_type == ServiceType::BGPROCESS) {
-                    rval = new bgproc_service(this, string(name), std::move(command),
+                    auto rvalps = new bgproc_service(this, string(name), std::move(command),
                         command_offsets, &depends_on, &depends_soft);
-                    rval->set_pid_file(std::move(pid_file));
+                    rvalps->set_pid_file(std::move(pid_file));
+                    rvalps->set_restart_interval(restart_interval, max_restarts);
+                    rval = rvalps;
                 }
                 else if (service_type == ServiceType::SCRIPTED) {
                     rval = new scripted_service(this, string(name), std::move(command),
@@ -547,7 +591,7 @@ ServiceRecord * ServiceSet::loadServiceRecord(const char * name)
         // Must remove the dummy service record.
         std::remove(records.begin(), records.end(), rval);
         delete rval;
-        throw ServiceDescriptionExc(name, setting_exc.getInfo());
+        throw ServiceDescriptionExc(name, std::move(setting_exc.getInfo()));
     }
     catch (...) {
         // Must remove the dummy service record.
index 63f5a158f4aedc569a090edd4aeb2146a2729571..9104408c87e8d6a79af4daff8611379e0f1b7ad0 100644 (file)
@@ -1190,6 +1190,11 @@ base_process_service::base_process_service(ServiceSet *sset, string name, Servic
     restart_interval_time = {0, 0};
     restart_timer.service = this;
     restart_timer.add_timer(eventLoop);
+
+    // By default, allow a maximum of 3 restarts within 10.0 seconds:
+    restart_interval.tv_sec = 10;
+    restart_interval.tv_nsec = 0;
+    max_restart_interval_count = 3;
 }
 
 void base_process_service::do_restart() noexcept
@@ -1228,6 +1233,13 @@ static timespec diff_time(timespec now, timespec then)
     return r;
 }
 
+static bool operator<(const timespec &a, const timespec &b)
+{
+    if (a.tv_sec < b.tv_sec) return true;
+    if (a.tv_sec == b.tv_sec && a.tv_nsec < b.tv_nsec) return true;
+    return false;
+}
+
 bool base_process_service::restart_ps_process() noexcept
 {
     timespec current_time;
@@ -1235,8 +1247,8 @@ bool base_process_service::restart_ps_process() noexcept
 
     // Check whether we're still in the most recent restart check interval:
     timespec int_diff = diff_time(current_time, restart_interval_time);
-    if (int_diff.tv_sec < 10) {
-        if (++restart_interval_count >= 3) {
+    if (int_diff < restart_interval) {
+        if (++restart_interval_count >= max_restart_interval_count) {
             log(LogLevel::ERROR, "Service ", service_name, " restarting too quickly; stopping.");
             return false;
         }
index 40a50297d4491ebb0f04fe5e9c01636020d004a4..d35357b62a2c50e4e9a024dba39f9b69fba051e4 100644 (file)
@@ -102,11 +102,11 @@ class ServiceLoadExc
 {
     public:
     std::string serviceName;
-    const char *excDescription;
+    std::string excDescription;
     
     protected:
-    ServiceLoadExc(std::string serviceName, const char *desc) noexcept
-        : serviceName(serviceName), excDescription(desc)
+    ServiceLoadExc(std::string serviceName, std::string &&desc) noexcept
+        : serviceName(serviceName), excDescription(std::move(desc))
     {
     }
 };
@@ -132,8 +132,8 @@ class ServiceCyclicDependency : public ServiceLoadExc
 class ServiceDescriptionExc : public ServiceLoadExc
 {
     public:
-    ServiceDescriptionExc(std::string serviceName, std::string extraInfo) noexcept
-        : ServiceLoadExc(serviceName, extraInfo.c_str())
+    ServiceDescriptionExc(std::string serviceName, std::string &&extraInfo) noexcept
+        : ServiceLoadExc(serviceName, std::move(extraInfo))
     {
     }    
 };
@@ -590,6 +590,9 @@ class base_process_service : public ServiceRecord
     timespec restart_interval_time;
     int restart_interval_count;
 
+    timespec restart_interval;
+    int max_restart_interval_count;
+
     // Start the process, return true on success
     virtual bool start_ps_process() noexcept;
     bool start_ps_process(const std::vector<const char *> &args, bool on_console) noexcept;
@@ -609,6 +612,12 @@ class base_process_service : public ServiceRecord
     ~base_process_service() noexcept
     {
     }
+
+    void set_restart_interval(timespec interval, int max_restarts)
+    {
+        restart_interval = interval;
+        max_restart_interval_count = max_restarts;
+    }
 };
 
 class process_service : public base_process_service