Re-work state propogation/transition logic.
authorDavin McCall <davmac@davmac.org>
Sun, 4 Aug 2019 04:23:15 +0000 (14:23 +1000)
committerDavin McCall <davmac@davmac.org>
Sun, 4 Aug 2019 09:15:19 +0000 (19:15 +1000)
This mostly keeps existing behaviour, hopefully making it more
consistent and predictable in other cases, and simplifies the code.

TODO
src/baseproc-service.cc
src/control.cc
src/includes/service.h
src/proc-service.cc
src/service.cc
src/tests/proctests.cc
src/tests/tests.cc

diff --git a/TODO b/TODO
index fee35b15676a7389a411560b7fa07527c11decdd..2b28abec2fb898d7a63109d8203ec37653128e71 100644 (file)
--- a/TODO
+++ b/TODO
@@ -12,8 +12,10 @@ For version 0.7.0:
 For version 0.8.0:
 ------------------
 * Get rid of "floating" service state:
-  - soft dependencies should always be re-attached when the dependency starts.
-  - "dinitctl wake" should fail (report error) if there are no running dependents.
+  - soft dependencies should remain attached when a dependency restarts and removed
+    only if it stops without restarting
+  - "dinitctl wake" should fail (report error) if there are no running dependents and
+    should re-attach soft dependencies.
   - if a service starts and required-by count is 0 it should always immediately stop.
   - update man pages accordingly.
 * Easy way to reload service description (including if service is running, where possible).
index 4ff189604b376e95eef86b0b643ebd29d1d207bb..32c49d5969600901f3ae2f59815ea62f1717cc49 100644 (file)
@@ -375,10 +375,6 @@ void base_process_service::timer_expired() noexcept
 
 void base_process_service::emergency_stop() noexcept
 {
-    if (! do_auto_restart() && start_explicit) {
-        start_explicit = false;
-        release(false);
-    }
     forced_stop();
     stop_dependents();
 }
index e4eb578aaa69b2e3c57b8c1d61a194b436ff4597..3b72a85b285858112e9b5935e64f162926c2f612 100644 (file)
@@ -223,7 +223,7 @@ bool control_conn_t::process_start_stop(int pktType)
     }
     
     // 1 byte: packet type
-    // 1 byte: flags eg pin in requested state (0 = no pin, 1 = pin)
+    // 1 byte: flags eg. pin in requested state (0 = no pin, 1 = pin)
     // 4 bytes: service handle
     
     bool do_pin = ((rbuf[1] & 1) == 1);
@@ -259,7 +259,6 @@ bool control_conn_t::process_start_stop(int pktType)
             // force service to stop
             bool gentle = ((rbuf[1] & 2) == 2);
             bool do_restart = ((rbuf[1] & 4) == 4);
-            bool is_active = service->is_marked_active();
             if (do_restart && services->is_shutting_down()) {
                 ack_buf[0] = DINIT_RP_NAK;
                 break;
@@ -275,16 +274,18 @@ bool control_conn_t::process_start_stop(int pktType)
                     goto clear_out;
                 }
             }
-            if (do_pin) service->pin_stop();
-            service->stop(true);
-            service->forced_stop();
-            services->process_queues();
-            service_state_t wanted_state = service_state_t::STOPPED;
+            service_state_t wanted_state;
             if (do_restart) {
-                service->start(is_active);
+                service->restart(); // TODO XXX check return, reply NAK if fail
                 wanted_state = service_state_t::STARTED;
-                services->process_queues();
             }
+            else {
+                if (do_pin) service->pin_stop();
+                service->stop(true);
+                wanted_state = service_state_t::STOPPED;
+            }
+            service->forced_stop();
+            services->process_queues();
             if (service->get_state() == wanted_state) ack_buf[0] = DINIT_RP_ALREADYSS;
             break;
         }
index 64ad50ddd8775941ae0a45f57962fb71ba0217b7..f1d62441997fd238b2958c558d00c2df7789d4b5 100644 (file)
@@ -152,6 +152,13 @@ class service_dep
 
     const dependency_type dep_type;
 
+    // Check if the dependency is a hard dependency (including milestone still waiting).
+    bool is_hard()
+    {
+        return dep_type == dependency_type::REGULAR
+                || (dep_type == dependency_type::MILESTONE && waiting_on);
+    }
+
     service_dep(service_record * from, service_record * to, dependency_type dep_type_p) noexcept
             : from(from), to(to), waiting_on(false), holding_acq(false), dep_type(dep_type_p)
     {  }
@@ -230,7 +237,7 @@ class service_record
     bool prop_start   : 1;
     bool prop_stop    : 1;
 
-    bool restarting   : 1;      // re-starting after unexpected termination
+    bool restarting   : 1;      // re-start after stopping
     bool start_failed : 1;      // failed to start (reset when begins starting)
     bool start_skipped : 1;     // start was skipped by interrupt
     
@@ -340,8 +347,6 @@ class service_record
     // Release console (console must be currently held by this service)
     void release_console() noexcept;
     
-    bool do_auto_restart() noexcept;
-
     // Started state reached
     bool process_started() noexcept;
 
@@ -513,6 +518,7 @@ class service_record
     
     void start(bool activate = true) noexcept;  // start the service
     void stop(bool bring_down = true) noexcept;   // stop the service
+    void restart() noexcept; // restart the service
     
     void forced_stop() noexcept; // force-stop this service and all dependents
     
@@ -824,7 +830,7 @@ class service_set
                 auto next = prop_queue.pop_front();
                 next->do_propagation();
             }
-            while (! stop_queue.is_empty()) {
+            if (! stop_queue.is_empty()) {
                 auto next = stop_queue.pop_front();
                 next->execute_transition();
             }
index 2f12b205755c12dd29dfca81e9e5354df72c4279..150f34d664d94f06cb840610d95359dcface6041 100644 (file)
@@ -186,7 +186,6 @@ void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexce
 {
     bool did_exit = exit_status.did_exit();
     bool was_signalled = exit_status.was_signalled();
-    restarting = false;
     auto service_state = get_state();
 
     if (notification_fd != -1) {
@@ -281,7 +280,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
     // This may be a "smooth recovery" where we are restarting the process while leaving the
     // service in the STARTED state.
     if (restarting && service_state == service_state_t::STARTED) {
-        restarting = false;
+        //restarting = false;
         bool need_stop = false;
         if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) {
             need_stop = true;
@@ -313,7 +312,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
         return;
     }
 
-    restarting = false;
+    //restarting = false;
     if (service_state == service_state_t::STARTING) {
         // POSIX requires that if the process exited clearly with a status code of 0,
         // the exit status value will be 0:
@@ -350,10 +349,6 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep
             do_smooth_recovery();
             return;
         }
-        if (! do_auto_restart() && start_explicit) {
-            start_explicit = false;
-            release(false);
-        }
         stop_reason = stopped_reason_t::TERMINATED;
         forced_stop();
         stop_dependents();
index b516c4421b4c3ce57a20ceca8d607c9422a4a202..88ec6c698355de7cf7342ebee62fb9c7de3caf12 100644 (file)
@@ -50,8 +50,28 @@ void service_record::stopped() noexcept
 
     force_stop = false;
 
-    bool will_restart = (desired_state == service_state_t::STARTED)
-            && !services->is_shutting_down();
+    restarting |= auto_restart;
+    bool will_restart = restarting && required_by > 0;
+    restarting = false;
+
+    // If we won't restart, break soft dependencies now
+    if (! will_restart) {
+        for (auto dept : dependents) {
+            if (! dept->is_hard()) {
+                // waits-for or soft dependency:
+                if (dept->waiting_on) {
+                    dept->waiting_on = false;
+                    dept->get_from()->dependency_started();
+                }
+                if (dept->holding_acq) {
+                    dept->holding_acq = false;
+                    // release without issuing stop, since we're called only when this
+                    // service is already stopped/stopping:
+                    release(false);
+                }
+            }
+        }
+    }
 
     for (auto & dependency : depends_on) {
         // we signal dependencies in case they are waiting for us to stop:
@@ -107,14 +127,6 @@ void service_record::stopped() noexcept
     notify_listeners(service_event_t::STOPPED);
 }
 
-bool service_record::do_auto_restart() noexcept
-{
-    if (auto_restart) {
-        return !services->is_shutting_down();
-    }
-    return false;
-}
-
 void service_record::require() noexcept
 {
     if (required_by++ == 0) {
@@ -134,9 +146,11 @@ void service_record::release(bool issue_stop) noexcept
 
         // Can stop, and can release dependencies now. We don't need to issue a release if
         // the require was pending though:
-        prop_release = !prop_require;
-        prop_require = false;
-        services->add_prop_queue(this);
+        if (service_state != service_state_t::STOPPED && service_state != service_state_t::STOPPING) {
+            prop_release = !prop_require;
+            prop_require = false;
+            services->add_prop_queue(this);
+        }
 
         if (service_state == service_state_t::STOPPED) {
             services->service_inactive(this);
@@ -167,8 +181,6 @@ void service_record::start(bool activate) noexcept
         require();
         start_explicit = true;
     }
-    
-    if (desired_state == service_state_t::STARTED && service_state != service_state_t::STOPPED) return;
 
     bool was_active = service_state != service_state_t::STOPPED || desired_state != service_state_t::STOPPED;
     desired_state = service_state_t::STARTED;
@@ -176,9 +188,15 @@ void service_record::start(bool activate) noexcept
     if (service_state != service_state_t::STOPPED) {
         // We're already starting/started, or we are stopping and need to wait for
         // that the complete.
-        if (service_state != service_state_t::STOPPING || ! can_interrupt_stop()) {
+        if (service_state != service_state_t::STOPPING) {
             return;
         }
+
+        if (! can_interrupt_stop()) {
+            restarting = true;
+            return;
+        }
+
         // We're STOPPING, and that can be interrupted. Our dependencies might be STOPPING,
         // but if so they are waiting (for us), so they too can be instantly returned to
         // STARTING state.
@@ -243,6 +261,14 @@ void service_record::execute_transition() noexcept
     else if (service_state == service_state_t::STOPPING) {
         if (stop_check_dependents()) {
             waiting_for_deps = false;
+
+            // A service that does actually stop for any reason should have its explicit activation released, unless
+            // it will restart:
+            if (start_explicit && !auto_restart && !restarting) {
+                start_explicit = false;
+                release(false);
+            }
+
             bring_down();
         }
     }
@@ -322,6 +348,7 @@ void service_record::all_deps_started() noexcept
     }
 
     bool start_success = bring_up();
+    restarting = false;
     if (! start_success) {
         failed_to_start();
     }
@@ -454,30 +481,12 @@ void service_record::stop(bool bring_down) noexcept
 {
     if (start_explicit) {
         start_explicit = false;
-        release();
+        required_by--;
     }
 
     // If our required_by count is 0, we should treat this as a full manual stop regardless
-    if (required_by == 0) bring_down = true;
-
-    // If it's a manual bring-down, we'll also break holds from waits-for dependencies, to avoid
-    // bouncing back up again -- but only if all holds are from waits-for dependencies.
-    if (bring_down) {
-        if (std::all_of(dependents.begin(), dependents.end(), [](service_dep * x) {
-            return x->dep_type == dependency_type::WAITS_FOR || x->dep_type == dependency_type::SOFT; }))
-        {
-            for (auto dept : dependents) {
-                if (dept->waiting_on) {
-                    dept->waiting_on = false;
-                    dept->get_from()->dependency_started();
-                }
-                if (dept->holding_acq) {
-                    dept->holding_acq = false;
-                    // release without issuing stop, since we issue stop if necessary below
-                    release(false);
-                }
-            }
-        }
+    if (required_by == 0) {
+        bring_down = true;
     }
 
     if (bring_down && service_state != service_state_t::STOPPED
@@ -487,14 +496,21 @@ void service_record::stop(bool bring_down) noexcept
     }
 }
 
-void service_record::do_stop() noexcept
+void service_record::restart() noexcept
 {
-    // A service that does actually stop for any reason should have its explicit activation released, unless
-    // it will restart:
-    if (start_explicit && ! do_auto_restart()) {
-        start_explicit = false;
-        release(false);
+    // Re-start without affecting dependency links/activation.
+
+    if (service_state == service_state_t::STARTED) {
+        restarting = true;
+        stop_reason = stopped_reason_t::NORMAL;
+        do_stop();
     }
+}
+
+void service_record::do_stop() noexcept
+{
+    // Called when we should definitely stop. We may need to restart afterwards, but we
+    // won't know that for sure until the execution transition.
 
     bool all_deps_stopped = stop_dependents();
 
@@ -535,6 +551,11 @@ void service_record::do_stop() noexcept
 
     if (pinned_started) return;
 
+    if (required_by == 0) {
+        prop_release = true;
+        services->add_prop_queue(this);
+    }
+
     service_state = service_state_t::STOPPING;
     waiting_for_deps = true;
     if (all_deps_stopped) {
@@ -546,12 +567,12 @@ bool service_record::stop_check_dependents() noexcept
 {
     bool all_deps_stopped = true;
     for (auto dept : dependents) {
-        if (dept->dep_type == dependency_type::REGULAR && ! dept->get_from()->is_stopped()) {
+        if (dept->is_hard() && dept->holding_acq) {
             all_deps_stopped = false;
             break;
         }
     }
-    
+
     return all_deps_stopped;
 }
 
@@ -559,9 +580,7 @@ bool service_record::stop_dependents() noexcept
 {
     bool all_deps_stopped = true;
     for (auto dept : dependents) {
-        if (dept->dep_type == dependency_type::REGULAR ||
-                (dept->dep_type == dependency_type::MILESTONE &&
-                dept->get_from()->service_state != service_state_t::STARTED)) {
+        if (dept->is_hard() && dept->holding_acq) {
             if (! dept->get_from()->is_stopped()) {
                 // Note we check *first* since if the dependent service is not stopped,
                 // 1. We will issue a stop to it shortly and
@@ -578,19 +597,6 @@ bool service_record::stop_dependents() noexcept
             dept->get_from()->prop_stop = true;
             services->add_prop_queue(dept->get_from());
         }
-        else {
-            // waits-for or soft dependency:
-            if (dept->waiting_on) {
-                dept->waiting_on = false;
-                dept->get_from()->dependency_started();
-            }
-            if (dept->holding_acq && !auto_restart) {
-                dept->holding_acq = false;
-                // release without issuing stop, since we should be called only when this
-                // service is already stopped/stopping:
-                release(false);
-            }
-        }
     }
 
     return all_deps_stopped;
@@ -607,6 +613,19 @@ void service_record::unpin() noexcept
 {
     if (pinned_started) {
         pinned_started = false;
+
+        for (auto &dep : depends_on) {
+            if (dep.is_hard()) {
+                if (dep.get_to()->get_state() != service_state_t::STARTED) {
+                    desired_state = service_state_t::STOPPED;
+                }
+            }
+            else if (dep.holding_acq) {
+                dep.holding_acq = false;
+                dep.get_to()->release();
+            }
+        }
+
         if (desired_state == service_state_t::STOPPED || force_stop) {
             do_stop();
             services->process_queues();
index cb36ede26d01363c5fbd4a218ce4d44ac4115648..395ec3e05e0365f04a132d3657a4d29d87f4b43c 100644 (file)
@@ -165,7 +165,6 @@ void test_proc_unexpected_term()
     assert(p.get_state() == service_state_t::STARTED);
 
     base_process_service_test::handle_exit(&p, 0);
-    sset.process_queues();
 
     assert(p.get_state() == service_state_t::STOPPED);
     assert(p.get_stop_reason() == stopped_reason_t::TERMINATED);
@@ -975,6 +974,7 @@ void test_waitsfor_restart()
     assert(tp.get_state() == service_state_t::STARTING);
     assert(p.get_state() == service_state_t::STOPPING);
 
+    // p terminates (finishes stopping). Then it should re-start...
     base_process_service_test::handle_signal_exit(&p, SIGTERM);
     sset.process_queues();
 
index dd1510d5d66c3930bfcc6e2dc2e4b86bc9c9b0f5..a0f45bcfd7aada537f0dbf1d02b180733d3cb521 100644 (file)
@@ -213,7 +213,7 @@ void test_pin1()
     // s3 should remain started due to pin:
     assert(s3->get_state() == service_state_t::STARTED);
     assert(s2->get_state() == service_state_t::STOPPING);
-    assert(s1->get_state() == service_state_t::STOPPING);
+    assert(s1->get_state() == service_state_t::STARTED);
 
     // If we now unpin, s3 should stop:
     s3->unpin();
@@ -223,8 +223,7 @@ void test_pin1()
     assert(s1->get_state() == service_state_t::STOPPED);
 }
 
-// Test that STOPPING dependency of pinned service returns to STARTED when the pinned service has
-// a start issued.
+// Test that issuing a stop to a pinned-started service does not stop the service or its dependencies.
 void test_pin2()
 {
     service_set sset;
@@ -251,15 +250,7 @@ void test_pin2()
     s3->stop(true);
     sset.process_queues();
 
-    // s3 should remain started due to pin, but s1 and s2 are released and go STOPPING:
-    assert(s3->get_state() == service_state_t::STARTED);
-    assert(s2->get_state() == service_state_t::STOPPING);
-    assert(s1->get_state() == service_state_t::STOPPING);
-
-    // If we now issue start, STOPPING should revert to STARTED:
-    s3->start(true);
-    sset.process_queues();
-
+    // s3 should remain started due to pin, s1 and s2 not released:
     assert(s3->get_state() == service_state_t::STARTED);
     assert(s2->get_state() == service_state_t::STARTED);
     assert(s1->get_state() == service_state_t::STARTED);
@@ -297,7 +288,7 @@ void test_pin3()
     // s3 should remain started due to pin, but s1 and s2 are released and go STOPPING:
     assert(s3->get_state() == service_state_t::STARTED);
     assert(s2->get_state() == service_state_t::STOPPING);
-    assert(s1->get_state() == service_state_t::STOPPING);
+    assert(s1->get_state() == service_state_t::STARTED);
 
     // If we now issue start, s2 still needs to stop (due to force stop):
     s3->start(true);
@@ -323,7 +314,7 @@ void test_pin4()
     service_record *s1 = new service_record(&sset, "test-service-1", service_type_t::INTERNAL, {});
     sset.add_service(s1);
 
-    // Pin s3:
+    // Pin s1:
     s1->pin_start();
 
     // Start the service:
@@ -331,8 +322,9 @@ void test_pin4()
 
     assert(s1->get_state() == service_state_t::STARTED);
 
-    // Issue stop:
+    // Issue forced stop:
     s1->stop(true);
+    s1->forced_stop();
     sset.process_queues();
 
     // s3 should remain started:
@@ -552,6 +544,91 @@ void test12()
     assert(sset.count_active_services() == 0);
 }
 
+// Tests for "restart" functionality.
+void test13()
+{
+    service_set sset;
+
+    test_service *s1 = new test_service(&sset, "test-service-1", service_type_t::INTERNAL, {});
+    test_service *s2 = new test_service(&sset, "test-service-2", service_type_t::INTERNAL, {{s1, WAITS}});
+    test_service *s3 = new test_service(&sset, "test-service-3", service_type_t::INTERNAL, {{s2, REG}});
+
+    sset.add_service(s1);
+    sset.add_service(s2);
+    sset.add_service(s3);
+
+    // Start all services via s3
+    sset.start_service(s3);
+    s1->started();
+    sset.process_queues();
+    s2->started();
+    sset.process_queues();
+    s3->started();
+    sset.process_queues();
+
+    assert(s3->get_state() == service_state_t::STARTED);
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTED);
+
+    s1->restart();
+    s1->forced_stop();
+    sset.process_queues();
+
+    assert(s3->get_state() == service_state_t::STARTED);
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTING);
+
+    s1->started();
+    sset.process_queues();
+
+    assert(s3->get_state() == service_state_t::STARTED);
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTED);
+}
+
+// Make sure a service only restarts once (i.e. restart flag doesn't get stuck)
+void test14()
+{
+    service_set sset;
+
+    test_service *s1 = new test_service(&sset, "test-service-1", service_type_t::INTERNAL, {});
+    test_service *s2 = new test_service(&sset, "test-service-2", service_type_t::INTERNAL, {{s1, WAITS}});
+
+    sset.add_service(s1);
+    sset.add_service(s2);
+
+    // Start all services via s3
+    sset.start_service(s2);
+    s1->started();
+    sset.process_queues();
+    s2->started();
+    sset.process_queues();
+
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTED);
+
+    s1->restart();
+    s1->forced_stop();
+    sset.process_queues();
+
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTING);
+
+    s1->started();
+    sset.process_queues();
+
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STARTED);
+
+    // Ok, we restarted s1. Now stop it:
+
+    s1->stop(true);
+    sset.process_queues();
+
+    assert(s2->get_state() == service_state_t::STARTED);
+    assert(s1->get_state() == service_state_t::STOPPED); // didn't restart
+}
+
 
 #define RUN_TEST(name, spacing) \
     std::cout << #name "..." spacing << std::flush; \
@@ -575,4 +652,6 @@ int main(int argc, char **argv)
     RUN_TEST(test10, "                    ");
     RUN_TEST(test11, "                    ");
     RUN_TEST(test12, "                    ");
+    RUN_TEST(test13, "                    ");
+    RUN_TEST(test14, "                    ");
 }