Re-read PID file after recovering a BGPROCESS service.
authorDavin McCall <davmac@davmac.org>
Sun, 3 Jan 2016 16:06:31 +0000 (16:06 +0000)
committerDavin McCall <davmac@davmac.org>
Sun, 3 Jan 2016 16:06:31 +0000 (16:06 +0000)
service.cc
service.h

index 13929f335af51d780d16d6fbbc1772ffeb83cfde..a3a01eb1261b0837da3854115440c1a04a7b1e70 100644 (file)
@@ -103,6 +103,36 @@ void ServiceRecord::process_child_callback(struct ev_loop *loop, ev_child *w, in
 
 void ServiceRecord::handle_exit_status() noexcept
 {
+    if (exit_status != 0 && service_state != ServiceState::STOPPING) {
+        log(LogLevel::ERROR, "Service ", service_name, " process terminated with exit code ", exit_status);
+    }
+
+    if (doing_recovery) {
+        // (BGPROCESS only)
+        doing_recovery = false;
+        bool do_stop = false;
+        if (exit_status != 0) {
+            do_stop = true;
+        }
+        else {
+            // We need to re-read the PID, since it has now changed.
+            if (service_type == ServiceType::BGPROCESS && pid_file.length() != 0) {
+                if (! read_pid_file()) {
+                    do_stop = true;
+                }
+            }
+        }
+        
+        if (do_stop) {
+            stop();
+            if (auto_restart && service_set->get_auto_restart()) {
+                start();
+            }
+        }
+        
+        return;
+    }
+    
     if (service_type == ServiceType::PROCESS || service_type == ServiceType::BGPROCESS) {
         if (service_state == ServiceState::STARTING) {
             // (only applies to BGPROCESS)
@@ -110,7 +140,6 @@ void ServiceRecord::handle_exit_status() noexcept
                 started();
             }
             else {
-                log(LogLevel::ERROR, "service ", service_name, " failed to start with exit code", exit_status);
                 failed_to_start();
             }
         }
@@ -123,6 +152,7 @@ void ServiceRecord::handle_exit_status() noexcept
             // TODO if we are pinned-started then we should probably check
             //      that dependencies have started before trying to re-start the
             //      service process.
+            doing_recovery = (service_type == ServiceType::BGPROCESS);
             start_ps_process();
             return;
         }
@@ -190,6 +220,7 @@ void ServiceRecord::process_child_status(struct ev_loop *loop, ev_io * stat_io,
                 sr->started();
             }
         }
+        
         if (sr->pid == -1) {
             // Somehow the process managed to complete before we even saw the status.
             sr->handle_exit_status();
@@ -330,6 +361,37 @@ void ServiceRecord::acquiredConsole() noexcept
     }
 }
 
+bool ServiceRecord::read_pid_file() noexcept
+{
+    const char *pid_file_c = pid_file.c_str();
+    int fd = open(pid_file_c, O_CLOEXEC);
+    if (fd != -1) {
+        char pidbuf[21]; // just enought to hold any 64-bit integer
+        int r = read(fd, pidbuf, 20);
+        if (r > 0) {
+            pidbuf[r] = 0; // store nul terminator
+            pid = std::atoi(pidbuf);
+            if (kill(pid, 0) == 0) {
+                ev_child_init(&child_listener, process_child_callback, pid, 0);
+                child_listener.data = this;
+                ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener);
+            }
+            else {
+                log(LogLevel::ERROR, service_name, ": pid read from pidfile (", pid, ") is not valid");
+                pid = -1;
+                close(fd);
+                return false;
+            }
+        }
+        close(fd);
+        return true;
+    }
+    else {
+        log(LogLevel::ERROR, service_name, ": read pid file: ", strerror(errno));
+        return false;
+    }
+}
+
 void ServiceRecord::started() noexcept
 {
     if (onstart_flags.runs_on_console && (service_type == ServiceType::SCRIPTED || service_type == ServiceType::BGPROCESS)) {
@@ -338,34 +400,12 @@ void ServiceRecord::started() noexcept
     }
     
     if (service_type == ServiceType::BGPROCESS && pid_file.length() != 0) {
-        const char *pid_file_c = pid_file.c_str();
-        int fd = open(pid_file_c, O_CLOEXEC);
-        if (fd != -1) {
-            char pidbuf[21]; // just enought to hold any 64-bit integer
-            int r = read(fd, pidbuf, 20);
-            if (r > 0) {
-                pidbuf[r] = 0; // store nul terminator
-                pid = std::atoi(pidbuf);
-                if (kill(pid, 0) == 0) {
-                    ev_child_init(&child_listener, process_child_callback, pid, 0);
-                    child_listener.data = this;
-                    ev_child_start(ev_default_loop(EVFLAG_AUTO), &child_listener);
-                }
-                else {
-                    log(LogLevel::ERROR, service_name, ": pid read from pidfile (", pid, ") is not valid");
-                    pid = -1;
-                    failed_to_start();
-                    close(fd);
-                    return;
-                }
-            }
-            close(fd);
-        }
-        else {
-            log(LogLevel::ERROR, service_name, ": read pid file: ", strerror(errno));
+        if (! read_pid_file()) {
+            failed_to_start();
+            return;
         }
     }
-
+    
     logServiceStarted(service_name);
     service_state = ServiceState::STARTED;
     notifyListeners(ServiceEvent::STARTED);
index b41d524493cc812c6fc8465287332479f4933c07..8fabc0c9ac79962113d40effd8320cff36e13a0c 100644 (file)
--- a/service.h
+++ b/service.h
@@ -171,8 +171,10 @@ class ServiceRecord
     bool pinned_stopped : 1;
     bool pinned_started : 1;
     
-    bool waiting_for_deps : 1;  /* if STARTING, whether we are waiting for dependencies (inc console) to start */
-    bool waiting_for_execstat : 1;  /* if we are waiting for exec status after fork() */
+    bool waiting_for_deps : 1;  // if STARTING, whether we are waiting for dependencies (inc console) to start
+    bool waiting_for_execstat : 1;  // if we are waiting for exec status after fork()
+    bool doing_recovery : 1;    // if we are currently recovering a BGPROCESS (restarting process, while
+                                //   holding STARTED service state)
 
     typedef std::list<ServiceRecord *> sr_list;
     typedef sr_list::iterator sr_iter;
@@ -249,6 +251,9 @@ class ServiceRecord
     
     void allDepsStarted(bool haveConsole = false) noexcept;
     
+    // Read the pid-file, return false on failure
+    bool read_pid_file() noexcept;
+    
     // Check whether dependencies have started, and optionally ask them to start
     bool startCheckDependencies(bool do_start) noexcept;