From: Davin McCall Date: Sun, 4 Aug 2019 04:23:15 +0000 (+1000) Subject: Re-work state propogation/transition logic. X-Git-Tag: v0.7.0~9 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=dc772cfe9b6661a0d0e5db888778b8167e702331;p=oweals%2Fdinit.git Re-work state propogation/transition logic. This mostly keeps existing behaviour, hopefully making it more consistent and predictable in other cases, and simplifies the code. --- diff --git a/TODO b/TODO index fee35b1..2b28abe 100644 --- a/TODO +++ b/TODO @@ -12,8 +12,10 @@ For version 0.7.0: For version 0.8.0: ------------------ * Get rid of "floating" service state: - - soft dependencies should always be re-attached when the dependency starts. - - "dinitctl wake" should fail (report error) if there are no running dependents. + - soft dependencies should remain attached when a dependency restarts and removed + only if it stops without restarting + - "dinitctl wake" should fail (report error) if there are no running dependents and + should re-attach soft dependencies. - if a service starts and required-by count is 0 it should always immediately stop. - update man pages accordingly. * Easy way to reload service description (including if service is running, where possible). diff --git a/src/baseproc-service.cc b/src/baseproc-service.cc index 4ff1896..32c49d5 100644 --- a/src/baseproc-service.cc +++ b/src/baseproc-service.cc @@ -375,10 +375,6 @@ void base_process_service::timer_expired() noexcept void base_process_service::emergency_stop() noexcept { - if (! do_auto_restart() && start_explicit) { - start_explicit = false; - release(false); - } forced_stop(); stop_dependents(); } diff --git a/src/control.cc b/src/control.cc index e4eb578..3b72a85 100644 --- a/src/control.cc +++ b/src/control.cc @@ -223,7 +223,7 @@ bool control_conn_t::process_start_stop(int pktType) } // 1 byte: packet type - // 1 byte: flags eg pin in requested state (0 = no pin, 1 = pin) + // 1 byte: flags eg. pin in requested state (0 = no pin, 1 = pin) // 4 bytes: service handle bool do_pin = ((rbuf[1] & 1) == 1); @@ -259,7 +259,6 @@ bool control_conn_t::process_start_stop(int pktType) // force service to stop bool gentle = ((rbuf[1] & 2) == 2); bool do_restart = ((rbuf[1] & 4) == 4); - bool is_active = service->is_marked_active(); if (do_restart && services->is_shutting_down()) { ack_buf[0] = DINIT_RP_NAK; break; @@ -275,16 +274,18 @@ bool control_conn_t::process_start_stop(int pktType) goto clear_out; } } - if (do_pin) service->pin_stop(); - service->stop(true); - service->forced_stop(); - services->process_queues(); - service_state_t wanted_state = service_state_t::STOPPED; + service_state_t wanted_state; if (do_restart) { - service->start(is_active); + service->restart(); // TODO XXX check return, reply NAK if fail wanted_state = service_state_t::STARTED; - services->process_queues(); } + else { + if (do_pin) service->pin_stop(); + service->stop(true); + wanted_state = service_state_t::STOPPED; + } + service->forced_stop(); + services->process_queues(); if (service->get_state() == wanted_state) ack_buf[0] = DINIT_RP_ALREADYSS; break; } diff --git a/src/includes/service.h b/src/includes/service.h index 64ad50d..f1d6244 100644 --- a/src/includes/service.h +++ b/src/includes/service.h @@ -152,6 +152,13 @@ class service_dep const dependency_type dep_type; + // Check if the dependency is a hard dependency (including milestone still waiting). + bool is_hard() + { + return dep_type == dependency_type::REGULAR + || (dep_type == dependency_type::MILESTONE && waiting_on); + } + service_dep(service_record * from, service_record * to, dependency_type dep_type_p) noexcept : from(from), to(to), waiting_on(false), holding_acq(false), dep_type(dep_type_p) { } @@ -230,7 +237,7 @@ class service_record bool prop_start : 1; bool prop_stop : 1; - bool restarting : 1; // re-starting after unexpected termination + bool restarting : 1; // re-start after stopping bool start_failed : 1; // failed to start (reset when begins starting) bool start_skipped : 1; // start was skipped by interrupt @@ -340,8 +347,6 @@ class service_record // Release console (console must be currently held by this service) void release_console() noexcept; - bool do_auto_restart() noexcept; - // Started state reached bool process_started() noexcept; @@ -513,6 +518,7 @@ class service_record void start(bool activate = true) noexcept; // start the service void stop(bool bring_down = true) noexcept; // stop the service + void restart() noexcept; // restart the service void forced_stop() noexcept; // force-stop this service and all dependents @@ -824,7 +830,7 @@ class service_set auto next = prop_queue.pop_front(); next->do_propagation(); } - while (! stop_queue.is_empty()) { + if (! stop_queue.is_empty()) { auto next = stop_queue.pop_front(); next->execute_transition(); } diff --git a/src/proc-service.cc b/src/proc-service.cc index 2f12b20..150f34d 100644 --- a/src/proc-service.cc +++ b/src/proc-service.cc @@ -186,7 +186,6 @@ void process_service::handle_exit_status(bp_sys::exit_status exit_status) noexce { bool did_exit = exit_status.did_exit(); bool was_signalled = exit_status.was_signalled(); - restarting = false; auto service_state = get_state(); if (notification_fd != -1) { @@ -281,7 +280,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep // This may be a "smooth recovery" where we are restarting the process while leaving the // service in the STARTED state. if (restarting && service_state == service_state_t::STARTED) { - restarting = false; + //restarting = false; bool need_stop = false; if ((did_exit && exit_status.get_exit_status() != 0) || was_signalled) { need_stop = true; @@ -313,7 +312,7 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep return; } - restarting = false; + //restarting = false; if (service_state == service_state_t::STARTING) { // POSIX requires that if the process exited clearly with a status code of 0, // the exit status value will be 0: @@ -350,10 +349,6 @@ void bgproc_service::handle_exit_status(bp_sys::exit_status exit_status) noexcep do_smooth_recovery(); return; } - if (! do_auto_restart() && start_explicit) { - start_explicit = false; - release(false); - } stop_reason = stopped_reason_t::TERMINATED; forced_stop(); stop_dependents(); diff --git a/src/service.cc b/src/service.cc index b516c44..88ec6c6 100644 --- a/src/service.cc +++ b/src/service.cc @@ -50,8 +50,28 @@ void service_record::stopped() noexcept force_stop = false; - bool will_restart = (desired_state == service_state_t::STARTED) - && !services->is_shutting_down(); + restarting |= auto_restart; + bool will_restart = restarting && required_by > 0; + restarting = false; + + // If we won't restart, break soft dependencies now + if (! will_restart) { + for (auto dept : dependents) { + if (! dept->is_hard()) { + // waits-for or soft dependency: + if (dept->waiting_on) { + dept->waiting_on = false; + dept->get_from()->dependency_started(); + } + if (dept->holding_acq) { + dept->holding_acq = false; + // release without issuing stop, since we're called only when this + // service is already stopped/stopping: + release(false); + } + } + } + } for (auto & dependency : depends_on) { // we signal dependencies in case they are waiting for us to stop: @@ -107,14 +127,6 @@ void service_record::stopped() noexcept notify_listeners(service_event_t::STOPPED); } -bool service_record::do_auto_restart() noexcept -{ - if (auto_restart) { - return !services->is_shutting_down(); - } - return false; -} - void service_record::require() noexcept { if (required_by++ == 0) { @@ -134,9 +146,11 @@ void service_record::release(bool issue_stop) noexcept // Can stop, and can release dependencies now. We don't need to issue a release if // the require was pending though: - prop_release = !prop_require; - prop_require = false; - services->add_prop_queue(this); + if (service_state != service_state_t::STOPPED && service_state != service_state_t::STOPPING) { + prop_release = !prop_require; + prop_require = false; + services->add_prop_queue(this); + } if (service_state == service_state_t::STOPPED) { services->service_inactive(this); @@ -167,8 +181,6 @@ void service_record::start(bool activate) noexcept require(); start_explicit = true; } - - if (desired_state == service_state_t::STARTED && service_state != service_state_t::STOPPED) return; bool was_active = service_state != service_state_t::STOPPED || desired_state != service_state_t::STOPPED; desired_state = service_state_t::STARTED; @@ -176,9 +188,15 @@ void service_record::start(bool activate) noexcept if (service_state != service_state_t::STOPPED) { // We're already starting/started, or we are stopping and need to wait for // that the complete. - if (service_state != service_state_t::STOPPING || ! can_interrupt_stop()) { + if (service_state != service_state_t::STOPPING) { return; } + + if (! can_interrupt_stop()) { + restarting = true; + return; + } + // We're STOPPING, and that can be interrupted. Our dependencies might be STOPPING, // but if so they are waiting (for us), so they too can be instantly returned to // STARTING state. @@ -243,6 +261,14 @@ void service_record::execute_transition() noexcept else if (service_state == service_state_t::STOPPING) { if (stop_check_dependents()) { waiting_for_deps = false; + + // A service that does actually stop for any reason should have its explicit activation released, unless + // it will restart: + if (start_explicit && !auto_restart && !restarting) { + start_explicit = false; + release(false); + } + bring_down(); } } @@ -322,6 +348,7 @@ void service_record::all_deps_started() noexcept } bool start_success = bring_up(); + restarting = false; if (! start_success) { failed_to_start(); } @@ -454,30 +481,12 @@ void service_record::stop(bool bring_down) noexcept { if (start_explicit) { start_explicit = false; - release(); + required_by--; } // If our required_by count is 0, we should treat this as a full manual stop regardless - if (required_by == 0) bring_down = true; - - // If it's a manual bring-down, we'll also break holds from waits-for dependencies, to avoid - // bouncing back up again -- but only if all holds are from waits-for dependencies. - if (bring_down) { - if (std::all_of(dependents.begin(), dependents.end(), [](service_dep * x) { - return x->dep_type == dependency_type::WAITS_FOR || x->dep_type == dependency_type::SOFT; })) - { - for (auto dept : dependents) { - if (dept->waiting_on) { - dept->waiting_on = false; - dept->get_from()->dependency_started(); - } - if (dept->holding_acq) { - dept->holding_acq = false; - // release without issuing stop, since we issue stop if necessary below - release(false); - } - } - } + if (required_by == 0) { + bring_down = true; } if (bring_down && service_state != service_state_t::STOPPED @@ -487,14 +496,21 @@ void service_record::stop(bool bring_down) noexcept } } -void service_record::do_stop() noexcept +void service_record::restart() noexcept { - // A service that does actually stop for any reason should have its explicit activation released, unless - // it will restart: - if (start_explicit && ! do_auto_restart()) { - start_explicit = false; - release(false); + // Re-start without affecting dependency links/activation. + + if (service_state == service_state_t::STARTED) { + restarting = true; + stop_reason = stopped_reason_t::NORMAL; + do_stop(); } +} + +void service_record::do_stop() noexcept +{ + // Called when we should definitely stop. We may need to restart afterwards, but we + // won't know that for sure until the execution transition. bool all_deps_stopped = stop_dependents(); @@ -535,6 +551,11 @@ void service_record::do_stop() noexcept if (pinned_started) return; + if (required_by == 0) { + prop_release = true; + services->add_prop_queue(this); + } + service_state = service_state_t::STOPPING; waiting_for_deps = true; if (all_deps_stopped) { @@ -546,12 +567,12 @@ bool service_record::stop_check_dependents() noexcept { bool all_deps_stopped = true; for (auto dept : dependents) { - if (dept->dep_type == dependency_type::REGULAR && ! dept->get_from()->is_stopped()) { + if (dept->is_hard() && dept->holding_acq) { all_deps_stopped = false; break; } } - + return all_deps_stopped; } @@ -559,9 +580,7 @@ bool service_record::stop_dependents() noexcept { bool all_deps_stopped = true; for (auto dept : dependents) { - if (dept->dep_type == dependency_type::REGULAR || - (dept->dep_type == dependency_type::MILESTONE && - dept->get_from()->service_state != service_state_t::STARTED)) { + if (dept->is_hard() && dept->holding_acq) { if (! dept->get_from()->is_stopped()) { // Note we check *first* since if the dependent service is not stopped, // 1. We will issue a stop to it shortly and @@ -578,19 +597,6 @@ bool service_record::stop_dependents() noexcept dept->get_from()->prop_stop = true; services->add_prop_queue(dept->get_from()); } - else { - // waits-for or soft dependency: - if (dept->waiting_on) { - dept->waiting_on = false; - dept->get_from()->dependency_started(); - } - if (dept->holding_acq && !auto_restart) { - dept->holding_acq = false; - // release without issuing stop, since we should be called only when this - // service is already stopped/stopping: - release(false); - } - } } return all_deps_stopped; @@ -607,6 +613,19 @@ void service_record::unpin() noexcept { if (pinned_started) { pinned_started = false; + + for (auto &dep : depends_on) { + if (dep.is_hard()) { + if (dep.get_to()->get_state() != service_state_t::STARTED) { + desired_state = service_state_t::STOPPED; + } + } + else if (dep.holding_acq) { + dep.holding_acq = false; + dep.get_to()->release(); + } + } + if (desired_state == service_state_t::STOPPED || force_stop) { do_stop(); services->process_queues(); diff --git a/src/tests/proctests.cc b/src/tests/proctests.cc index cb36ede..395ec3e 100644 --- a/src/tests/proctests.cc +++ b/src/tests/proctests.cc @@ -165,7 +165,6 @@ void test_proc_unexpected_term() assert(p.get_state() == service_state_t::STARTED); base_process_service_test::handle_exit(&p, 0); - sset.process_queues(); assert(p.get_state() == service_state_t::STOPPED); assert(p.get_stop_reason() == stopped_reason_t::TERMINATED); @@ -975,6 +974,7 @@ void test_waitsfor_restart() assert(tp.get_state() == service_state_t::STARTING); assert(p.get_state() == service_state_t::STOPPING); + // p terminates (finishes stopping). Then it should re-start... base_process_service_test::handle_signal_exit(&p, SIGTERM); sset.process_queues(); diff --git a/src/tests/tests.cc b/src/tests/tests.cc index dd1510d..a0f45bc 100644 --- a/src/tests/tests.cc +++ b/src/tests/tests.cc @@ -213,7 +213,7 @@ void test_pin1() // s3 should remain started due to pin: assert(s3->get_state() == service_state_t::STARTED); assert(s2->get_state() == service_state_t::STOPPING); - assert(s1->get_state() == service_state_t::STOPPING); + assert(s1->get_state() == service_state_t::STARTED); // If we now unpin, s3 should stop: s3->unpin(); @@ -223,8 +223,7 @@ void test_pin1() assert(s1->get_state() == service_state_t::STOPPED); } -// Test that STOPPING dependency of pinned service returns to STARTED when the pinned service has -// a start issued. +// Test that issuing a stop to a pinned-started service does not stop the service or its dependencies. void test_pin2() { service_set sset; @@ -251,15 +250,7 @@ void test_pin2() s3->stop(true); sset.process_queues(); - // s3 should remain started due to pin, but s1 and s2 are released and go STOPPING: - assert(s3->get_state() == service_state_t::STARTED); - assert(s2->get_state() == service_state_t::STOPPING); - assert(s1->get_state() == service_state_t::STOPPING); - - // If we now issue start, STOPPING should revert to STARTED: - s3->start(true); - sset.process_queues(); - + // s3 should remain started due to pin, s1 and s2 not released: assert(s3->get_state() == service_state_t::STARTED); assert(s2->get_state() == service_state_t::STARTED); assert(s1->get_state() == service_state_t::STARTED); @@ -297,7 +288,7 @@ void test_pin3() // s3 should remain started due to pin, but s1 and s2 are released and go STOPPING: assert(s3->get_state() == service_state_t::STARTED); assert(s2->get_state() == service_state_t::STOPPING); - assert(s1->get_state() == service_state_t::STOPPING); + assert(s1->get_state() == service_state_t::STARTED); // If we now issue start, s2 still needs to stop (due to force stop): s3->start(true); @@ -323,7 +314,7 @@ void test_pin4() service_record *s1 = new service_record(&sset, "test-service-1", service_type_t::INTERNAL, {}); sset.add_service(s1); - // Pin s3: + // Pin s1: s1->pin_start(); // Start the service: @@ -331,8 +322,9 @@ void test_pin4() assert(s1->get_state() == service_state_t::STARTED); - // Issue stop: + // Issue forced stop: s1->stop(true); + s1->forced_stop(); sset.process_queues(); // s3 should remain started: @@ -552,6 +544,91 @@ void test12() assert(sset.count_active_services() == 0); } +// Tests for "restart" functionality. +void test13() +{ + service_set sset; + + test_service *s1 = new test_service(&sset, "test-service-1", service_type_t::INTERNAL, {}); + test_service *s2 = new test_service(&sset, "test-service-2", service_type_t::INTERNAL, {{s1, WAITS}}); + test_service *s3 = new test_service(&sset, "test-service-3", service_type_t::INTERNAL, {{s2, REG}}); + + sset.add_service(s1); + sset.add_service(s2); + sset.add_service(s3); + + // Start all services via s3 + sset.start_service(s3); + s1->started(); + sset.process_queues(); + s2->started(); + sset.process_queues(); + s3->started(); + sset.process_queues(); + + assert(s3->get_state() == service_state_t::STARTED); + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTED); + + s1->restart(); + s1->forced_stop(); + sset.process_queues(); + + assert(s3->get_state() == service_state_t::STARTED); + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTING); + + s1->started(); + sset.process_queues(); + + assert(s3->get_state() == service_state_t::STARTED); + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTED); +} + +// Make sure a service only restarts once (i.e. restart flag doesn't get stuck) +void test14() +{ + service_set sset; + + test_service *s1 = new test_service(&sset, "test-service-1", service_type_t::INTERNAL, {}); + test_service *s2 = new test_service(&sset, "test-service-2", service_type_t::INTERNAL, {{s1, WAITS}}); + + sset.add_service(s1); + sset.add_service(s2); + + // Start all services via s3 + sset.start_service(s2); + s1->started(); + sset.process_queues(); + s2->started(); + sset.process_queues(); + + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTED); + + s1->restart(); + s1->forced_stop(); + sset.process_queues(); + + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTING); + + s1->started(); + sset.process_queues(); + + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STARTED); + + // Ok, we restarted s1. Now stop it: + + s1->stop(true); + sset.process_queues(); + + assert(s2->get_state() == service_state_t::STARTED); + assert(s1->get_state() == service_state_t::STOPPED); // didn't restart +} + #define RUN_TEST(name, spacing) \ std::cout << #name "..." spacing << std::flush; \ @@ -575,4 +652,6 @@ int main(int argc, char **argv) RUN_TEST(test10, " "); RUN_TEST(test11, " "); RUN_TEST(test12, " "); + RUN_TEST(test13, " "); + RUN_TEST(test14, " "); }