Remember the reason why a service stopped.
authorDavin McCall <davmac@davmac.org>
Fri, 15 Jun 2018 18:47:13 +0000 (19:47 +0100)
committerDavin McCall <davmac@davmac.org>
Mon, 18 Jun 2018 20:29:12 +0000 (21:29 +0100)
This will later allow for reporting the reason to dinitctl etc.

src/baseproc-service.cc
src/includes/service.h
src/proc-service.cc
src/service.cc
src/tests/proctests.cc
src/tests/tests.cc

index 2cffa65a9fdd863f1b0b0c713d10b51c666b6d11..14d9f2a1bb58eb1cbd25fedc2c1e499e3ea652b4 100644 (file)
@@ -311,6 +311,7 @@ void base_process_service::timer_expired() noexcept
         // Starting, start timed out.
         log(loglevel_t::WARN, "Service ", get_name(), " with pid ", pid, " exceeded allowed start time; cancelling.");
         interrupt_start();
+        stop_reason = stopped_reason_t::TIMEDOUT;
         failed_to_start(false, false);
     }
     else {
index 246a249d933f25ea919529f03d9f979e465986be..1c9e9c5d65a3b9960a594665d4d35b556d62e478 100644 (file)
@@ -183,6 +183,20 @@ enum class dependency_type
     MILESTONE   // dependency must start successfully, but once started the dependency becomes soft
 };
 
+enum class stopped_reason_t
+{
+    NORMAL,
+
+    // Start failures:
+    DEPFAILED, // A dependency failed to start
+    FAILED,    // failed to start (process terminated)
+       EXECFAILED, // failed to start (couldn't launch process)
+    TIMEDOUT,  // timed out when starting
+
+    // Failure after starting:
+    TERMINATED // process terminated
+};
+
 /* Service dependency record */
 class service_dep
 {
@@ -319,6 +333,8 @@ class service_record
     uid_t socket_uid = -1;  // socket user id or -1
     gid_t socket_gid = -1;  // socket group id or -1
 
+    stopped_reason_t stop_reason = stopped_reason_t::NORMAL;  // reason why stopped
+
     // Data for use by service_set
     public:
     
@@ -617,6 +633,12 @@ class service_record
         }
         depends_on.clear();
     }
+
+    // Why did the service stop?
+    stopped_reason_t get_stop_reason()
+    {
+        return stop_reason;
+    }
 };
 
 inline auto extract_prop_queue(service_record *sr) -> decltype(sr->prop_queue_node) &
index 46034df8583818d4fa9c87a72dbc31e661841f78..40d0585fbde2067f66d973c2e6a0877ac79239b4 100644 (file)
@@ -152,6 +152,7 @@ void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexce
             started();
         }
         else {
+            stop_reason = stopped_reason_t::FAILED;
             failed_to_start();
         }
     }
@@ -169,6 +170,7 @@ void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexce
         return;
     }
     else {
+        stop_reason = stopped_reason_t::TERMINATED;
         emergency_stop();
     }
     services->process_queues();
@@ -178,10 +180,12 @@ void process_service::exec_failed(int errcode) noexcept
 {
     log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
     if (get_state() == service_state_t::STARTING) {
+        stop_reason = stopped_reason_t::EXECFAILED;
         failed_to_start();
     }
     else {
         // Process service in smooth recovery:
+        stop_reason = stopped_reason_t::TERMINATED;
         emergency_stop();
     }
 }
@@ -231,6 +235,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
 
         if (need_stop) {
             // Failed startup: no auto-restart.
+            stop_reason = stopped_reason_t::TERMINATED;
             emergency_stop();
             services->process_queues();
         }
@@ -247,6 +252,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
             switch (pid_result) {
                 case pid_result_t::FAILED:
                     // Failed startup: no auto-restart.
+                    stop_reason = stopped_reason_t::FAILED;
                     failed_to_start();
                     break;
                 case pid_result_t::TERMINATED:
@@ -259,6 +265,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
             }
         }
         else {
+            stop_reason = stopped_reason_t::FAILED;
             failed_to_start();
         }
     }
@@ -277,6 +284,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
             start_explicit = false;
             release(false);
         }
+        stop_reason = stopped_reason_t::TERMINATED;
         forced_stop();
         stop_dependents();
         stopped();
@@ -288,6 +296,7 @@ void bgproc_service::exec_failed(int errcode) noexcept
 {
     log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
     // Only time we execute is for startup:
+    stop_reason = stopped_reason_t::EXECFAILED;
     failed_to_start();
 }
 
@@ -364,6 +373,7 @@ void scripted_service::handle_exit_status(bp_sys::exit_status exit_status) noexc
                 log(loglevel_t::ERROR, "Service ", get_name(), " command terminated due to signal ",
                         exit_status.get_term_sig());
             }
+            stop_reason = stopped_reason_t::FAILED;
             failed_to_start();
         }
         services->process_queues();
@@ -375,6 +385,7 @@ void scripted_service::exec_failed(int errcode) noexcept
     log(loglevel_t::ERROR, get_name(), ": execution failed: ", strerror(errcode));
     auto service_state = get_state();
     if (service_state == service_state_t::STARTING) {
+        stop_reason = stopped_reason_t::EXECFAILED;
         failed_to_start();
     }
     else if (service_state == service_state_t::STOPPING) {
index 597683e4a41e5679e47494ff3a693ede5962ff8a..40d7156f9a6a2a90c89889322e41b4df23359407 100644 (file)
@@ -146,6 +146,7 @@ void service_record::release(bool issue_stop) noexcept
             services->service_inactive(this);
         }
         else if (issue_stop) {
+               stop_reason = stopped_reason_t::NORMAL;
             do_stop();
         }
     }
@@ -219,6 +220,7 @@ void service_record::do_propagation() noexcept
     
     if (prop_failure) {
         prop_failure = false;
+        stop_reason = stopped_reason_t::DEPFAILED;
         failed_to_start(true);
     }
     
@@ -459,7 +461,9 @@ void service_record::stop(bool bring_down) noexcept
         release();
     }
 
-    if (bring_down && service_state != service_state_t::STOPPED) {
+    if (bring_down && service_state != service_state_t::STOPPED
+               && service_state != service_state_t::STOPPING) {
+       stop_reason = stopped_reason_t::NORMAL;
         do_stop();
     }
 }
index 4602f263c7e8fa8a53b1cc007f4552ca1431bfca..fa437170b4108fd26ad73125244e9b1e200d7cae 100644 (file)
@@ -26,6 +26,12 @@ class base_process_service_test
         bsp->exec_succeeded();
     }
 
+    static void exec_failed(base_process_service *bsp, int errcode)
+    {
+       bsp->waiting_for_execstat = false;
+       bsp->exec_failed(errcode);
+    }
+
     static void handle_exit(base_process_service *bsp, int exit_status)
     {
         bsp->pid = -1;
@@ -110,6 +116,7 @@ void test_proc_unexpected_term()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::TERMINATED);
     assert(event_loop.active_timers.size() == 0);
 
     sset.remove_service(&p);
@@ -150,6 +157,7 @@ void test_term_via_stop()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::NORMAL);
     assert(event_loop.active_timers.size() == 0);
 
     sset.remove_service(&p);
@@ -185,6 +193,7 @@ void test_proc_start_timeout()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::TIMEDOUT);
     assert(event_loop.active_timers.size() == 0);
 
     sset.remove_service(&p);
@@ -223,12 +232,45 @@ void test_proc_start_timeout2()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::TIMEDOUT);
     assert(ts.get_state() == service_state_t::STARTED);
     assert(event_loop.active_timers.size() == 0);
 
     sset.remove_service(&p);
 }
 
+// Test exec() failure for process service start.
+void test_proc_start_execfail()
+{
+    using namespace std;
+
+    service_set sset;
+
+    string command = "test-command";
+    list<pair<unsigned,unsigned>> command_offsets;
+    command_offsets.emplace_back(0, command.length());
+    std::list<prelim_dep> depends;
+
+    process_service p {&sset, "testproc", std::move(command), command_offsets, depends};
+    init_service_defaults(p);
+    sset.add_service(&p);
+
+    p.start(true);
+    sset.process_queues();
+
+    assert(p.get_state() == service_state_t::STARTING);
+
+    base_process_service_test::exec_failed(&p, ENOENT);
+    sset.process_queues();
+
+    assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::EXECFAILED);
+    assert(event_loop.active_timers.size() == 0);
+
+    sset.remove_service(&p);
+}
+
+
 // Test stop timeout
 void test_proc_stop_timeout()
 {
@@ -272,6 +314,7 @@ void test_proc_stop_timeout()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::NORMAL);
 
     // Note that timer is still active as we faked its expiry above
     //assert(event_loop.active_timers.size() == 0);
@@ -416,6 +459,7 @@ void test_scripted_stop_timeout()
     sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::NORMAL);
 
     event_loop.active_timers.clear();
     sset.remove_service(&p);
@@ -457,6 +501,9 @@ void test_scripted_start_fail()
     assert(p.get_state() == service_state_t::STOPPED);
     assert(s2->get_state() == service_state_t::STOPPED);
     assert(s3->get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::FAILED);
+    assert(s2->get_stop_reason() == stopped_reason_t::DEPFAILED);
+    assert(s3->get_stop_reason() == stopped_reason_t::DEPFAILED);
 
     event_loop.active_timers.clear();
     sset.remove_service(&p);
@@ -566,6 +613,8 @@ void test_scripted_start_skip()
 
     assert(p.get_state() == service_state_t::STOPPED);
     assert(s2->get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::NORMAL);
+    assert(s2->get_stop_reason() == stopped_reason_t::NORMAL);
     assert(sset.count_active_services() == 0);
 
     event_loop.active_timers.clear();
@@ -613,12 +662,15 @@ void test_scripted_start_skip2()
 
     assert(p.get_state() == service_state_t::STOPPED);
     assert(s2->get_state() == service_state_t::STOPPED);
+    assert(p.get_stop_reason() == stopped_reason_t::NORMAL);
+    assert(s2->get_stop_reason() == stopped_reason_t::NORMAL);
     assert(sset.count_active_services() == 0);
 
     event_loop.active_timers.clear();
     sset.remove_service(&p);
 }
 
+
 #define RUN_TEST(name, spacing) \
     std::cout << #name "..." spacing; \
     name(); \
@@ -631,6 +683,7 @@ int main(int argc, char **argv)
     RUN_TEST(test_term_via_stop, "        ");
     RUN_TEST(test_proc_start_timeout, "   ");
     RUN_TEST(test_proc_start_timeout2, "  ");
+    RUN_TEST(test_proc_start_execfail, "  ");
     RUN_TEST(test_proc_stop_timeout, "    ");
     RUN_TEST(test_proc_smooth_recovery1, "");
     RUN_TEST(test_proc_smooth_recovery2, "");
index 208d104b3d9d84fede88cc3a8feb0ea2a1aeca23..cfdaf807784b4c26adb95990ef620dd1e0c99d5b 100644 (file)
@@ -417,6 +417,7 @@ void test9()
     s1->failed_to_start();
     sset.process_queues();
 
+    assert(s1->get_state() == service_state_t::STOPPED);
     assert(s2->get_state() == service_state_t::STOPPED);
 }