From: Christian Grothoff Date: Mon, 5 Oct 2009 09:56:25 +0000 (+0000) Subject: improving ARM API X-Git-Tag: initial-import-from-subversion-38251~23385 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=d75fb1880a887a8f5339c5e8cf5e9d2b8755fdad;p=oweals%2Fgnunet.git improving ARM API --- diff --git a/BUGS b/BUGS index 31f791f72..affae73e8 100644 --- a/BUGS +++ b/BUGS @@ -92,8 +92,6 @@ sane end-user should care about this codebase yet anyway. - implement exponential back-off for service restarts - better tracking of which config changes actually need to cause process restarts by ARM. - have way to specify dependencies between services (to manage ARM restarts better) - - client-API is inefficient since it opens a TCP connection per service that is started - (instead of re-using connections). * CORE: - code currently notifies clients about "encrypted" connections being up well before we get the encrypted PONG; sometimes this may be OK (for topology killing diff --git a/src/arm/arm_api.c b/src/arm/arm_api.c index f0ab8d189..7f2274958 100644 --- a/src/arm/arm_api.c +++ b/src/arm/arm_api.c @@ -32,85 +32,200 @@ #include "gnunet_server_lib.h" #include "arm.h" +/** + * How often do we re-try tranmsitting requests to ARM before + * giving up? Note that if we succeeded transmitting a request + * but failed to read a response, we do NOT re-try (since that + * might result in ARM getting a request twice). + */ +#define MAX_ATTEMPTS 4 /** - * FIXME: document. + * Minimum delay between attempts to talk to ARM. */ -struct ArmContext +#define MIN_RETRY_DELAY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 100) + + +/** + * How long are we willing to wait for a service operation during the multi-operation + * request processing? + */ +#define MULTI_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 5) + + +/** + * Handle for interacting with ARM. + */ +struct GNUNET_ARM_Handle { /** - * FIXME: document. + * Our connection to the ARM service. + */ + struct GNUNET_CLIENT_Connection *client; + + /** + * The configuration that we are using. + */ + const struct GNUNET_CONFIGURATION_Handle *cfg; + + /** + * Scheduler to use. + */ + struct GNUNET_SCHEDULER_Handle *sched; + +}; + + +/** + * Setup a context for communicating with ARM. Note that this + * can be done even if the ARM service is not yet running. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param service service that *this* process is implementing/providing, can be NULL + * @return context to use for further ARM operations, NULL on error + */ +struct GNUNET_ARM_Handle * +GNUNET_ARM_connect (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + const char *service) +{ + struct GNUNET_ARM_Handle *ret; + struct GNUNET_CLIENT_Connection *client; + + client = GNUNET_CLIENT_connect (sched, "arm", cfg); + if (client == NULL) + return NULL; + ret = GNUNET_malloc (sizeof (struct GNUNET_ARM_Handle)); + ret->cfg = cfg; + ret->sched = sched; + ret->client = client; + return ret; +} + + +/** + * Disconnect from the ARM service. + * + * @param h the handle that was being used + */ +void +GNUNET_ARM_disconnect (struct GNUNET_ARM_Handle *h) +{ + if (h->client != NULL) + GNUNET_CLIENT_disconnect (h->client); + GNUNET_free (h); +} + + +/** + * Internal state for a request with ARM. + */ +struct RequestContext +{ + + /** + * Pointer to our handle with ARM. + */ + struct GNUNET_ARM_Handle *h; + + /** + * Function to call with a status code for the requested operation. */ GNUNET_ARM_Callback callback; /** - * FIXME: document. + * Closure for "callback". */ void *cls; /** - * FIXME: document. + * The service that is being manipulated. Do not free. */ - char *service_name; + const char *service_name; /** - * FIXME: document. + * Timeout for the operation. */ - struct GNUNET_CLIENT_Connection *client; + struct GNUNET_TIME_Absolute timeout; /** - * FIXME: document. + * Length of service_name plus one. */ - const struct GNUNET_CONFIGURATION_Handle *cfg; + size_t slen; /** - * FIXME: document. + * Number of attempts left for transmitting the request to ARM. + * We may fail the first time (say because ARM is not yet up), + * in which case we wait a bit and re-try (timeout permitting). */ - struct GNUNET_TIME_Absolute timeout; + unsigned int attempts_left; /** - * FIXME: document. + * Type of the request expressed as a message type (start or stop). */ uint16_t type; + }; /** - * FIXME: document. + * A client specifically requested starting of ARM itself. + * This function is called with information about whether + * or not ARM is running; if it is, report success. If + * it is not, start the ARM process. + * + * @param cls the context for the request that we will report on (struct RequestContext*) + * @param tc why were we called (reason says if ARM is running) */ static void -arm_service_report (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) +arm_service_report (void *cls, + const struct GNUNET_SCHEDULER_TaskContext *tc) { - struct ArmContext *pos = cls; + struct RequestContext *pos = cls; pid_t pid; char *binary; char *config; if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_PREREQ_DONE)) { + /* arm is running! */ if (pos->callback != NULL) pos->callback (pos->cls, GNUNET_YES); GNUNET_free (pos); return; } - binary = NULL; - config = NULL; + /* FIXME: should we check that HOSTNAME for 'arm' is localhost? */ /* start service */ - if ((GNUNET_OK != - GNUNET_CONFIGURATION_get_value_string (pos->cfg, - "arm", - "BINARY", - &binary)) || - (GNUNET_OK != - GNUNET_CONFIGURATION_get_value_filename (pos->cfg, - "arm", "CONFIG", &config))) + if (GNUNET_OK != + GNUNET_CONFIGURATION_get_value_string (pos->h->cfg, + "arm", + "BINARY", + &binary)) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Configuration failes to specify option `%s' in section `%s'!\n"), + "BINARY", + "arm"); + if (pos->callback != NULL) + pos->callback (pos->cls, GNUNET_SYSERR); + GNUNET_free (pos); + return; + } + if (GNUNET_OK != + GNUNET_CONFIGURATION_get_value_filename (pos->h->cfg, + "arm", "CONFIG", &config)) { GNUNET_log (GNUNET_ERROR_TYPE_WARNING, - _("Configuration file or binary for ARM not known!\n")); + _("Configuration fails to specify option `%s' in section `%s'!\n"), + "CONFIG", + "arm"); if (pos->callback != NULL) pos->callback (pos->cls, GNUNET_SYSERR); - GNUNET_free_non_null (binary); + GNUNET_free (binary); GNUNET_free (pos); return; } @@ -128,24 +243,34 @@ arm_service_report (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) GNUNET_free (pos); return; } - /* FIXME: consider checking again to see if it worked!? */ if (pos->callback != NULL) pos->callback (pos->cls, GNUNET_YES); GNUNET_free (pos); } +/** + * Process a response from ARM to a request for a change in service + * status. + * + * @param cls the request context + * @param msg the response + */ static void handle_response (void *cls, const struct GNUNET_MessageHeader *msg) { - struct ArmContext *sc = cls; + struct RequestContext *sc = cls; int ret; if (msg == NULL) { GNUNET_log (GNUNET_ERROR_TYPE_WARNING, _("Error receiving response from ARM service\n")); - GNUNET_CLIENT_disconnect (sc->client); + GNUNET_CLIENT_disconnect (sc->h->client); + sc->h->client = GNUNET_CLIENT_connect (sc->h->sched, + "arm", + sc->h->cfg); + GNUNET_assert (NULL != sc->h->client); if (sc->callback != NULL) sc->callback (sc->cls, GNUNET_SYSERR); GNUNET_free (sc); @@ -170,73 +295,144 @@ handle_response (void *cls, const struct GNUNET_MessageHeader *msg) GNUNET_break (0); ret = GNUNET_SYSERR; } - GNUNET_CLIENT_disconnect (sc->client); if (sc->callback != NULL) sc->callback (sc->cls, ret); GNUNET_free (sc); } +/** + * We've failed to transmit the request to the ARM service. + * Report our failure and clean up the state. + * + * @param sctx the state of the (now failed) request + */ +static void +report_transmit_failure (struct RequestContext *sctx) +{ + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Error while trying to transmit to ARM service\n")); + if (sctx->callback != NULL) + sctx->callback (sctx->cls, GNUNET_SYSERR); + GNUNET_free (sctx); +} + + +/** + * Transmit a request for a service status change to the + * ARM service. + * + * @param cls the "struct RequestContext" identifying the request + * @param size how many bytes are available in buf + * @param buf where to write the request, NULL on error + * @return number of bytes written to buf + */ +static size_t +send_service_msg (void *cls, size_t size, void *buf); + + +/** + * We've failed to transmit the request to the ARM service but + * are now going to try again. + * + * @param cls state of the request + * @param tc task context (unused) + */ +static void +retry_request (void *cls, + const struct GNUNET_SCHEDULER_TaskContext *tc) +{ + struct RequestContext *sctx = cls; + + if (NULL == + GNUNET_CLIENT_notify_transmit_ready (sctx->h->client, + sctx->slen + + sizeof (struct + GNUNET_MessageHeader), + GNUNET_TIME_absolute_get_remaining (sctx->timeout), + &send_service_msg, + sctx)) + { + report_transmit_failure (sctx); + return; + } +} + + +/** + * Transmit a request for a service status change to the + * ARM service. + * + * @param cls the "struct RequestContext" identifying the request + * @param size how many bytes are available in buf + * @param buf where to write the request, NULL on error + * @return number of bytes written to buf + */ static size_t send_service_msg (void *cls, size_t size, void *buf) { - struct ArmContext *sctx = cls; + struct RequestContext *sctx = cls; struct GNUNET_MessageHeader *msg; - size_t slen; + struct GNUNET_TIME_Relative rem; if (buf == NULL) { - GNUNET_log (GNUNET_ERROR_TYPE_WARNING, - _("Error while trying to transmit to ARM service\n")); - GNUNET_CLIENT_disconnect (sctx->client); - if (sctx->callback != NULL) - sctx->callback (sctx->cls, GNUNET_SYSERR); - GNUNET_free (sctx->service_name); - GNUNET_free (sctx); + GNUNET_CLIENT_disconnect (sctx->h->client); + sctx->h->client = GNUNET_CLIENT_connect (sctx->h->sched, + "arm", + sctx->h->cfg); + GNUNET_assert (sctx->h->client != NULL); + rem = GNUNET_TIME_absolute_get_remaining (sctx->timeout); + if ( (sctx->attempts_left-- > 0) && + (rem.value > 0) ) + { + GNUNET_SCHEDULER_add_delayed (sctx->h->sched, + GNUNET_NO, + GNUNET_SCHEDULER_PRIORITY_KEEP, + GNUNET_SCHEDULER_NO_TASK, + GNUNET_TIME_relative_min (MIN_RETRY_DELAY, + rem), + &retry_request, + sctx); + return 0; + } + report_transmit_failure (sctx); return 0; } #if DEBUG_ARM GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("Transmitting service request to ARM.\n")); #endif - slen = strlen (sctx->service_name) + 1; - GNUNET_assert (size >= slen); + GNUNET_assert (size >= sctx->slen); msg = buf; - msg->size = htons (sizeof (struct GNUNET_MessageHeader) + slen); + msg->size = htons (sizeof (struct GNUNET_MessageHeader) + sctx->slen); msg->type = htons (sctx->type); - memcpy (&msg[1], sctx->service_name, slen); - GNUNET_free (sctx->service_name); - sctx->service_name = NULL; - GNUNET_CLIENT_receive (sctx->client, + memcpy (&msg[1], sctx->service_name, sctx->slen); + GNUNET_CLIENT_receive (sctx->h->client, &handle_response, sctx, GNUNET_TIME_absolute_get_remaining (sctx->timeout)); - return slen + sizeof (struct GNUNET_MessageHeader); + return sctx->slen + sizeof (struct GNUNET_MessageHeader); } /** * Start or stop a service. * + * @param h handle to ARM * @param service_name name of the service - * @param cfg configuration to use (needed to contact ARM; - * the ARM service may internally use a different - * configuration to determine how to start the service). - * @param sched scheduler to use * @param timeout how long to wait before failing for good * @param cb callback to invoke when service is ready * @param cb_cls closure for callback * @param type type of the request */ static void -change_service (const char *service_name, - const struct GNUNET_CONFIGURATION_Handle *cfg, - struct GNUNET_SCHEDULER_Handle *sched, +change_service (struct GNUNET_ARM_Handle *h, + const char *service_name, struct GNUNET_TIME_Relative timeout, GNUNET_ARM_Callback cb, void *cb_cls, uint16_t type) { - struct GNUNET_CLIENT_Connection *client; - struct ArmContext *sctx; + struct RequestContext *sctx; size_t slen; slen = strlen (service_name) + 1; @@ -248,128 +444,248 @@ change_service (const char *service_name, cb (cb_cls, GNUNET_NO); return; } - client = GNUNET_CLIENT_connect (sched, "arm", cfg); - if (client == NULL) - { - GNUNET_log (GNUNET_ERROR_TYPE_WARNING, - _("Failed to connect to ARM service\n")); - if (cb != NULL) - cb (cb_cls, GNUNET_SYSERR); - return; - } #if DEBUG_ARM GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("ARM requests starting of service `%s'.\n"), service_name); #endif - sctx = GNUNET_malloc (sizeof (struct ArmContext)); + sctx = GNUNET_malloc (sizeof (struct RequestContext) + slen); + sctx->h = h; sctx->callback = cb; sctx->cls = cb_cls; - sctx->client = client; - sctx->service_name = GNUNET_strdup (service_name); + sctx->service_name = (const char*) &sctx[1]; + memcpy (&sctx[1], + service_name, + slen); sctx->timeout = GNUNET_TIME_relative_to_absolute (timeout); + sctx->slen = slen; + sctx->attempts_left = MAX_ATTEMPTS; sctx->type = type; - if (NULL == - GNUNET_CLIENT_notify_transmit_ready (client, - slen + - sizeof (struct - GNUNET_MessageHeader), - timeout, &send_service_msg, sctx)) - { - GNUNET_log (GNUNET_ERROR_TYPE_WARNING, - _("Failed to transmit request to ARM service\n")); - GNUNET_free (sctx->service_name); - GNUNET_free (sctx); - if (cb != NULL) - cb (cb_cls, GNUNET_SYSERR); - GNUNET_CLIENT_disconnect (client); - return; - } + retry_request (sctx, NULL); } /** * Start a service. * + * @param h handle to ARM * @param service_name name of the service - * @param cfg configuration to use (needed to contact ARM; - * the ARM service may internally use a different - * configuration to determine how to start the service). - * @param sched scheduler to use * @param timeout how long to wait before failing for good * @param cb callback to invoke when service is ready * @param cb_cls closure for callback */ void -GNUNET_ARM_start_service (const char *service_name, - const struct GNUNET_CONFIGURATION_Handle *cfg, - struct GNUNET_SCHEDULER_Handle *sched, +GNUNET_ARM_start_service (struct GNUNET_ARM_Handle *h, + const char *service_name, struct GNUNET_TIME_Relative timeout, GNUNET_ARM_Callback cb, void *cb_cls) { - struct ArmContext *sctx; + struct RequestContext *sctx; GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), service_name); if (0 == strcmp ("arm", service_name)) { - sctx = GNUNET_malloc (sizeof (struct ArmContext)); + sctx = GNUNET_malloc (sizeof (struct RequestContext)); + sctx->h = h; sctx->callback = cb; sctx->cls = cb_cls; - sctx->cfg = cfg; - GNUNET_CLIENT_service_test (sched, + sctx->timeout = GNUNET_TIME_relative_to_absolute (timeout); + GNUNET_CLIENT_service_test (h->sched, "arm", - cfg, timeout, &arm_service_report, sctx); + h->cfg, timeout, &arm_service_report, sctx); return; } - change_service (service_name, - cfg, - sched, timeout, cb, cb_cls, GNUNET_MESSAGE_TYPE_ARM_START); + change_service (h, service_name, timeout, cb, cb_cls, GNUNET_MESSAGE_TYPE_ARM_START); } - - /** * Stop a service. * + * @param h handle to ARM * @param service_name name of the service - * @param cfg configuration to use (needed to contact ARM; - * the ARM service may internally use a different - * configuration to determine how to start the service). - * @param sched scheduler to use * @param timeout how long to wait before failing for good * @param cb callback to invoke when service is ready * @param cb_cls closure for callback */ void -GNUNET_ARM_stop_service (const char *service_name, - const struct GNUNET_CONFIGURATION_Handle *cfg, - struct GNUNET_SCHEDULER_Handle *sched, +GNUNET_ARM_stop_service (struct GNUNET_ARM_Handle *h, + const char *service_name, struct GNUNET_TIME_Relative timeout, GNUNET_ARM_Callback cb, void *cb_cls) { - struct GNUNET_CLIENT_Connection *client; - GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping service `%s'\n"), service_name); if (0 == strcmp ("arm", service_name)) { - client = GNUNET_CLIENT_connect (sched, "arm", cfg); - if (client == NULL) - { - if (cb != NULL) - cb (cb_cls, GNUNET_SYSERR); - return; - } - GNUNET_CLIENT_service_shutdown (client); - GNUNET_CLIENT_disconnect (client); + GNUNET_CLIENT_service_shutdown (h->client); if (cb != NULL) cb (cb_cls, GNUNET_NO); return; } - change_service (service_name, - cfg, - sched, timeout, cb, cb_cls, GNUNET_MESSAGE_TYPE_ARM_STOP); + change_service (h, service_name, timeout, cb, cb_cls, GNUNET_MESSAGE_TYPE_ARM_STOP); } + +/** + * Function to call for each service. + * + * @param h handle to ARM + * @param service_name name of the service + * @param timeout how long to wait before failing for good + * @param cb callback to invoke when service is ready + * @param cb_cls closure for callback + */ +typedef void (*ServiceOperation) (struct GNUNET_ARM_Handle *h, + const char *service_name, + struct GNUNET_TIME_Relative timeout, + GNUNET_ARM_Callback cb, void *cb_cls); + + +/** + * Context for starting or stopping multiple services. + */ +struct MultiContext +{ + /** + * NULL-terminated array of services to start or stop. + */ + char **services; + + /** + * Our handle to ARM. + */ + struct GNUNET_ARM_Handle *h; + + /** + * Identifies the operation (start or stop). + */ + ServiceOperation op; + + /** + * Current position in "services". + */ + unsigned int pos; +}; + + +/** + * Run the operation for the next service in the multi-service + * request. + * + * @param cls the "struct MultiContext" that is being processed + * @param success status of the previous operation (ignored) + */ +static void +next_operation (void *cls, + int success) +{ + struct MultiContext *mc = cls; + char *pos; + + if (NULL == (pos = mc->services[mc->pos])) + { + GNUNET_free (mc->services); + GNUNET_ARM_disconnect (mc->h); + GNUNET_free (mc); + return; + } + mc->pos++; + mc->op (mc->h, pos, MULTI_TIMEOUT, &next_operation, mc); + GNUNET_free (pos); +} + + +/** + * Run a multi-service request. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param op the operation to perform for each service + * @param va NULL-terminated list of services + */ +static void +run_multi_request (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + ServiceOperation op, + va_list va) +{ + va_list cp; + unsigned int total; + struct MultiContext *mc; + struct GNUNET_ARM_Handle *h; + const char *c; + + h = GNUNET_ARM_connect (cfg, sched, NULL); + if (NULL == h) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + _("Error while trying to transmit to ARM service\n")); + return; + } + total = 1; + va_copy (cp, va); + while (NULL != (va_arg (cp, const char*))) total++; + va_end (cp); + mc = GNUNET_malloc (sizeof(struct MultiContext)); + mc->services = GNUNET_malloc (total * sizeof (char*)); + mc->h = h; + mc->op = op; + total = 0; + va_copy (cp, va); + while (NULL != (c = va_arg (cp, const char*))) + mc->services[total++] = GNUNET_strdup (c); + va_end (cp); + next_operation (mc, GNUNET_YES); +} + + +/** + * Start multiple services in the specified order. Convenience + * function. Works asynchronously, failures are not reported. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param ... NULL-terminated list of service names (const char*) + */ +void +GNUNET_ARM_start_services (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + ...) +{ + va_list ap; + + va_start (ap, sched); + run_multi_request (cfg, sched, &GNUNET_ARM_start_service, ap); + va_end (ap); +} + + +/** + * Stop multiple services in the specified order. Convenience + * function. Works asynchronously, failures are not reported. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param ... NULL-terminated list of service names (const char*) + */ +void +GNUNET_ARM_stop_services (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + ...) +{ + va_list ap; + + va_start (ap, sched); + run_multi_request (cfg, sched, &GNUNET_ARM_stop_service, ap); + va_end (ap); +} + + /* end of arm_api.c */ diff --git a/src/arm/gnunet-arm.c b/src/arm/gnunet-arm.c index 740fd2533..09a1305a5 100644 --- a/src/arm/gnunet-arm.c +++ b/src/arm/gnunet-arm.c @@ -65,7 +65,46 @@ static char *test; */ static int ret; +/** + * Connection with ARM. + */ +static struct GNUNET_ARM_Handle *h; + +/** + * Our scheduler. + */ +static struct GNUNET_SCHEDULER_Handle *sched; + +/** + * Our configuration. + */ +const struct GNUNET_CONFIGURATION_Handle *cfg; + +/** + * Processing stage that we are in. Simple counter. + */ +static unsigned int phase; + +/** + * Main continuation-passing-style loop. Runs the various + * jobs that we've been asked to do in order. + * + * @param cls closure, unused + * @param tc context, unused + */ +static void +cps_loop (void *cls, + const struct GNUNET_SCHEDULER_TaskContext *tc); + + +/** + * Callback invoked with the status of the last operation. Reports to the + * user and then runs the next phase in the FSM. + * + * @param cls pointer to "const char*" identifying service that was manipulated + * @param success GNUNET_OK if service is now running, GNUNET_NO if not, GNUNET_SYSERR on error + */ static void confirm_cb (void *cls, int success) { @@ -83,9 +122,21 @@ confirm_cb (void *cls, int success) _("Error updating service `%s': ARM not running\n"), service); break; } + GNUNET_SCHEDULER_add_continuation (sched, + GNUNET_NO, + &cps_loop, + NULL, + GNUNET_SCHEDULER_REASON_PREREQ_DONE); } +/** + * Function called to confirm that a service is running (or that + * it is not running). + * + * @param cls pointer to "const char*" identifying service that was manipulated + * @param tc reason determines if service is now running + */ static void confirm_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) { @@ -95,6 +146,11 @@ confirm_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) fprintf (stdout, _("Service `%s' is running.\n"), service); else fprintf (stdout, _("Service `%s' is not running.\n"), service); + GNUNET_SCHEDULER_add_continuation (sched, + GNUNET_NO, + &cps_loop, + NULL, + GNUNET_SCHEDULER_REASON_PREREQ_DONE); } @@ -102,40 +158,90 @@ confirm_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) * Main function that will be run by the scheduler. * * @param cls closure - * @param sched the scheduler to use + * @param s the scheduler to use * @param args remaining command-line arguments * @param cfgfile name of the configuration file used (for saving, can be NULL!) - * @param cfg configuration + * @param c configuration */ static void run (void *cls, - struct GNUNET_SCHEDULER_Handle *sched, + struct GNUNET_SCHEDULER_Handle *s, char *const *args, const char *cfgfile, - const struct GNUNET_CONFIGURATION_Handle *cfg) + const struct GNUNET_CONFIGURATION_Handle *c) { - if (term != NULL) - { - GNUNET_ARM_stop_service (term, cfg, sched, TIMEOUT, &confirm_cb, term); - } - if (end) - { - GNUNET_ARM_stop_service ("arm", - cfg, sched, TIMEOUT, &confirm_cb, "arm"); - } - if (start) + sched = s; + cfg = c; + h = GNUNET_ARM_connect (cfg, sched, NULL); + if (h == NULL) { - GNUNET_ARM_start_service ("arm", - cfg, sched, TIMEOUT, &confirm_cb, "arm"); + fprintf (stderr, + _("Fatal error initializing ARM API.\n")); + ret = 1; + return; } - if (init != NULL) - { - GNUNET_ARM_start_service (init, cfg, sched, TIMEOUT, &confirm_cb, init); - } - if (test != NULL) + GNUNET_SCHEDULER_add_continuation (sched, + GNUNET_NO, + &cps_loop, + NULL, + GNUNET_SCHEDULER_REASON_PREREQ_DONE); +} + + +/** + * Main continuation-passing-style loop. Runs the various + * jobs that we've been asked to do in order. + * + * @param cls closure, unused + * @param tc context, unused + */ +static void +cps_loop (void *cls, + const struct GNUNET_SCHEDULER_TaskContext *tc) +{ + while (1) { - GNUNET_CLIENT_service_test (sched, - test, cfg, TIMEOUT, &confirm_task, test); + switch (phase++) + { + case 0: + if (term != NULL) + { + GNUNET_ARM_stop_service (h, term, TIMEOUT, &confirm_cb, term); + return; + } + break; + case 1: + if (end) + { + GNUNET_ARM_stop_service (h, "arm", TIMEOUT, &confirm_cb, "arm"); + return; + } + break; + case 2: + if (start) + { + GNUNET_ARM_start_service (h, "arm", TIMEOUT, &confirm_cb, "arm"); + return; + } + break; + case 3: + if (init != NULL) + { + GNUNET_ARM_start_service (h, init, TIMEOUT, &confirm_cb, init); + return; + } + break; + case 4: + if (test != NULL) + { + GNUNET_CLIENT_service_test (sched, test, cfg, TIMEOUT, &confirm_task, test); + return; + } + break; + default: /* last phase */ + GNUNET_ARM_disconnect (h); + return; + } } } diff --git a/src/arm/gnunet-service-arm.c b/src/arm/gnunet-service-arm.c index c7143e3a7..d43adeae2 100644 --- a/src/arm/gnunet-service-arm.c +++ b/src/arm/gnunet-service-arm.c @@ -46,9 +46,15 @@ /** - * Run maintenance every second. + * Run normal maintenance every 2s. */ -#define MAINT_FREQUENCY GNUNET_TIME_UNIT_SECONDS +#define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 2) + +/** + * Run fast maintenance after 100ms. This is used for an extra-job + * that is run to check for a process that we just killed. + */ +#define MAINT_FAST_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 100) /** * How long do we wait until we decide that a service @@ -56,8 +62,18 @@ */ #define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES +/** + * List of our services. + */ struct ServiceList; +/** + * Function to call if waitpid informs us that + * a process has died. + * + * @param cls closure + * @param pos entry in the service list of the process that died + */ typedef void (*CleanCallback) (void *cls, struct ServiceList * pos); /** @@ -137,6 +153,24 @@ static struct GNUNET_SCHEDULER_Handle *sched; static char *prefix_command; +/** + * Background task doing maintenance. + * + * @param cls closure, NULL if we need to self-restart + * @param tc context + */ +static void +maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc); + + +/** + * Transmit a status result message. + * + * @param cls pointer to "unit16_t*" with message type + * @param size number of bytes available in buf + * @param buf where to copy the message, NULL on error + * @return number of bytes copied to buf + */ static size_t write_result (void *cls, size_t size, void *buf) { @@ -159,6 +193,9 @@ write_result (void *cls, size_t size, void *buf) * Signal our client that we will start or stop the * service. * + * @param client who is being signalled + * @param name name of the service + * @param result message type to send * @return NULL if it was not found */ static void @@ -188,6 +225,7 @@ signal_result (struct GNUNET_SERVER_Client *client, * Find the process with the given service * name in the given list, remove it and return it. * + * @param name which service entry to look up * @return NULL if it was not found */ static struct ServiceList * @@ -216,6 +254,11 @@ find_name (const char *name) } +/** + * Free an entry in the service list. + * + * @param pos entry to free + */ static void free_entry (struct ServiceList *pos) { @@ -226,8 +269,6 @@ free_entry (struct ServiceList *pos) } - - /** * Actually start the process for the given service. * @@ -335,6 +376,9 @@ start_process (struct ServiceList *sl) /** * Start the specified service. + * + * @param client who is asking for this + * @param servicename name of the service to start */ static void start_service (struct GNUNET_SERVER_Client *client, const char *servicename) @@ -394,6 +438,13 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename) } +/** + * Free the given entry in the service list and signal + * the given client that the service is now down. + * + * @param cls pointer to the client ("struct GNUNET_SERVER_Client*") + * @param pos entry for the service + */ static void free_and_signal (void *cls, struct ServiceList *pos) { @@ -409,9 +460,13 @@ free_and_signal (void *cls, struct ServiceList *pos) /** * Stop the specified service. + * + * @param client who is asking for this + * @param servicename name of the service to stop */ static void -stop_service (struct GNUNET_SERVER_Client *client, const char *servicename) +stop_service (struct GNUNET_SERVER_Client *client, + const char *servicename) { struct ServiceList *pos; struct GNUNET_CLIENT_Connection *sc; @@ -445,6 +500,11 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename) pos->kill_continuation = &free_and_signal; pos->kill_continuation_cls = client; GNUNET_SERVER_client_keep (client); + GNUNET_SCHEDULER_add_delayed (sched, + GNUNET_YES, + GNUNET_SCHEDULER_PRIORITY_IDLE, + GNUNET_SCHEDULER_NO_TASK, + MAINT_FAST_FREQUENCY, &maint, NULL); } else { @@ -529,11 +589,10 @@ handle_stop (void *cls, } - /** * Background task doing maintenance. * - * @param cls closure + * @param cls closure, NULL if we need to self-restart * @param tc context */ static void @@ -561,11 +620,12 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) } return; } - GNUNET_SCHEDULER_add_delayed (tc->sched, - GNUNET_YES, - GNUNET_SCHEDULER_PRIORITY_IDLE, - GNUNET_SCHEDULER_NO_TASK, - MAINT_FREQUENCY, &maint, NULL); + if (cls == NULL) + GNUNET_SCHEDULER_add_delayed (tc->sched, + GNUNET_YES, + GNUNET_SCHEDULER_PRIORITY_IDLE, + GNUNET_SCHEDULER_NO_TASK, + MAINT_FREQUENCY, &maint, NULL); /* check for services that died (WAITPID) */ prev = NULL; @@ -656,8 +716,7 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) /** - * List of handlers for the messages understood by this - * service. + * List of handlers for the messages understood by this service. */ static struct GNUNET_SERVER_MessageHandler handlers[] = { {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0}, diff --git a/src/arm/test_arm_api.c b/src/arm/test_arm_api.c index fbd90583e..22fa3716e 100644 --- a/src/arm/test_arm_api.c +++ b/src/arm/test_arm_api.c @@ -40,17 +40,28 @@ static struct GNUNET_SCHEDULER_Handle *sched; static const struct GNUNET_CONFIGURATION_Handle *cfg; +static struct GNUNET_ARM_Handle *arm; + static int ok = 1; + +static void +arm_notify_stop (void *cls, int success) +{ + GNUNET_assert (success == GNUNET_NO); +#if START_ARM + GNUNET_ARM_stop_service (arm, "arm", TIMEOUT, NULL, NULL); +#endif +} + + static void dns_notify (void *cls, const struct sockaddr *addr, socklen_t addrlen) { if (addr == NULL) { GNUNET_assert (ok == 0); -#if START_ARM - GNUNET_ARM_stop_service ("arm", cfg, sched, TIMEOUT, NULL, NULL); -#endif + GNUNET_ARM_stop_service (arm, "resolver", TIMEOUT, &arm_notify_stop, NULL); return; } GNUNET_assert (addr != NULL); @@ -62,21 +73,17 @@ static void resolver_notify (void *cls, int success) { GNUNET_assert (success == GNUNET_YES); - sleep (1); /* FIXME: that we need to do this is a problem... */ GNUNET_RESOLVER_ip_get (sched, cfg, "localhost", AF_INET, TIMEOUT, &dns_notify, NULL); } + static void arm_notify (void *cls, int success) { GNUNET_assert (success == GNUNET_YES); -#if START_ARM - sleep (1); /* FIXME: that we need to do this is a problem... */ -#endif - GNUNET_ARM_start_service ("resolver", - cfg, sched, TIMEOUT, &resolver_notify, NULL); + GNUNET_ARM_start_service (arm, "resolver", TIMEOUT, &resolver_notify, NULL); } @@ -89,8 +96,9 @@ task (void *cls, { cfg = c; sched = s; + arm = GNUNET_ARM_connect (cfg, sched, NULL); #if START_ARM - GNUNET_ARM_start_service ("arm", cfg, sched, TIMEOUT, &arm_notify, NULL); + GNUNET_ARM_start_service (arm, "arm", TIMEOUT, &arm_notify, NULL); #else arm_notify (NULL, GNUNET_YES); #endif diff --git a/src/core/test_core_api.c b/src/core/test_core_api.c index 13c1dc48e..d72af8254 100644 --- a/src/core/test_core_api.c +++ b/src/core/test_core_api.c @@ -82,8 +82,8 @@ terminate_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) GNUNET_CORE_disconnect (p2.ch); GNUNET_TRANSPORT_disconnect (p1.th); GNUNET_TRANSPORT_disconnect (p2.th); - GNUNET_ARM_stop_service ("core", p1.cfg, sched, TIMEOUT, NULL, NULL); - GNUNET_ARM_stop_service ("core", p2.cfg, sched, TIMEOUT, NULL, NULL); + GNUNET_ARM_stop_services (p1.cfg, sched, "core", NULL); + GNUNET_ARM_stop_services (p2.cfg, sched, "core", NULL); ok = 0; } @@ -96,8 +96,8 @@ terminate_task_error (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) GNUNET_CORE_disconnect (p2.ch); GNUNET_TRANSPORT_disconnect (p1.th); GNUNET_TRANSPORT_disconnect (p2.th); - GNUNET_ARM_stop_service ("core", p1.cfg, sched, TIMEOUT, NULL, NULL); - GNUNET_ARM_stop_service ("core", p2.cfg, sched, TIMEOUT, NULL, NULL); + GNUNET_ARM_stop_services (p1.cfg, sched, "core", NULL); + GNUNET_ARM_stop_services (p2.cfg, sched, "core", NULL); ok = 42; } @@ -303,7 +303,7 @@ setup_peer (struct PeerContext *p, const char *cfgname) sleep (1); /* allow ARM to start */ #endif GNUNET_assert (GNUNET_OK == GNUNET_CONFIGURATION_load (p->cfg, cfgname)); - GNUNET_ARM_start_service ("core", p->cfg, sched, TIMEOUT, NULL, NULL); + GNUNET_ARM_start_services (p->cfg, sched, "core", NULL); p->th = GNUNET_TRANSPORT_connect (sched, p->cfg, p, NULL, NULL, NULL); GNUNET_assert (p->th != NULL); GNUNET_TRANSPORT_get_hello (p->th, TIMEOUT, &process_hello, p); diff --git a/src/core/test_core_api_start_only.c b/src/core/test_core_api_start_only.c index d0a5aedc4..ce2668b89 100644 --- a/src/core/test_core_api_start_only.c +++ b/src/core/test_core_api_start_only.c @@ -147,8 +147,8 @@ init_notify (void *cls, GNUNET_assert (cls == &p2); GNUNET_CORE_disconnect (p1.ch); GNUNET_CORE_disconnect (p2.ch); - GNUNET_ARM_stop_service ("core", p1.cfg, sched, TIMEOUT, NULL, NULL); - GNUNET_ARM_stop_service ("core", p2.cfg, sched, TIMEOUT, NULL, NULL); + GNUNET_ARM_stop_services (p1.cfg, sched, "core", NULL); + GNUNET_ARM_stop_services (p2.cfg, sched, "core", NULL); ok = 0; } @@ -169,7 +169,7 @@ setup_peer (struct PeerContext *p, const char *cfgname) sleep (1); /* allow ARM to start */ #endif GNUNET_assert (GNUNET_OK == GNUNET_CONFIGURATION_load (p->cfg, cfgname)); - GNUNET_ARM_start_service ("core", p->cfg, sched, TIMEOUT, NULL, NULL); + GNUNET_ARM_start_services (p->cfg, sched, "core", NULL); } diff --git a/src/include/gnunet_arm_service.h b/src/include/gnunet_arm_service.h index bbb8cb0f6..8e68cd5ee 100644 --- a/src/include/gnunet_arm_service.h +++ b/src/include/gnunet_arm_service.h @@ -58,45 +58,111 @@ typedef void (*GNUNET_ARM_Callback) (void *cls, int success); /** - * Start a service. + * Handle for interacting with ARM. + */ +struct GNUNET_ARM_Handle; + + +/** + * Setup a context for communicating with ARM. Note that this + * can be done even if the ARM service is not yet running. * - * @param service_name name of the service * @param cfg configuration to use (needed to contact ARM; * the ARM service may internally use a different * configuration to determine how to start the service). * @param sched scheduler to use + * @param service service that *this* process is implementing/providing, can be NULL + * @return context to use for further ARM operations, NULL on error + */ +struct GNUNET_ARM_Handle * +GNUNET_ARM_connect (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + const char *service); + + +/** + * Disconnect from the ARM service. + * + * @param h the handle that was being used + */ +void +GNUNET_ARM_disconnect (struct GNUNET_ARM_Handle *h); + + +/** + * Start a service. Note that this function merely asks ARM to start + * the service and that ARM merely confirms that it forked the + * respective process. The specified callback may thus return before + * the service has started to listen on the server socket and it may + * also be that the service has crashed in the meantime. Clients + * should repeatedly try to connect to the service at the respective + * port (with some delays in between) before assuming that the service + * actually failed to start. Note that if an error is returned to the + * callback, clients obviously should not bother with trying to + * contact the service. + * + * @param h handle to ARM + * @param service_name name of the service * @param timeout how long to wait before failing for good * @param cb callback to invoke when service is ready * @param cb_cls closure for callback */ void -GNUNET_ARM_start_service (const char *service_name, - const struct GNUNET_CONFIGURATION_Handle *cfg, - struct GNUNET_SCHEDULER_Handle *sched, +GNUNET_ARM_start_service (struct GNUNET_ARM_Handle *h, + const char *service_name, struct GNUNET_TIME_Relative timeout, GNUNET_ARM_Callback cb, void *cb_cls); /** - * Stop a service. + * Stop a service. Note that the callback is invoked as soon + * as ARM confirms that it will ask the service to terminate. + * The actual termination may still take some time. * + * @param h handle to ARM * @param service_name name of the service - * @param cfg configuration to use (needed to contact ARM; - * the ARM service may internally use a different - * configuration to determine how to start the service). - * @param sched scheduler to use * @param timeout how long to wait before failing for good * @param cb callback to invoke when service is ready * @param cb_cls closure for callback */ void -GNUNET_ARM_stop_service (const char *service_name, - const struct GNUNET_CONFIGURATION_Handle *cfg, - struct GNUNET_SCHEDULER_Handle *sched, +GNUNET_ARM_stop_service (struct GNUNET_ARM_Handle *h, + const char *service_name, struct GNUNET_TIME_Relative timeout, GNUNET_ARM_Callback cb, void *cb_cls); +/** + * Start multiple services in the specified order. Convenience + * function. Works asynchronously, failures are not reported. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param ... NULL-terminated list of service names (const char*) + */ +void +GNUNET_ARM_start_services (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + ...); + + +/** + * Stop multiple services in the specified order. Convenience + * function. Works asynchronously, failures are not reported. + * + * @param cfg configuration to use (needed to contact ARM; + * the ARM service may internally use a different + * configuration to determine how to start the service). + * @param sched scheduler to use + * @param ... NULL-terminated list of service names (const char*) + */ +void +GNUNET_ARM_stop_services (const struct GNUNET_CONFIGURATION_Handle *cfg, + struct GNUNET_SCHEDULER_Handle *sched, + ...); + #if 0 /* keep Emacsens' auto-indent happy */ { diff --git a/src/testing/testing.c b/src/testing/testing.c index a82842777..4bdad178f 100644 --- a/src/testing/testing.c +++ b/src/testing/testing.c @@ -49,9 +49,8 @@ #define ARM_START_WAIT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 120) /** - * How many times are we willing to try to - * wait for "scp" or "gnunet-service-arm" to - * complete (waitpid) before giving up? + * How many times are we willing to try to wait for "scp" or + * "gnunet-service-arm" to complete (waitpid) before giving up? */ #define MAX_EXEC_WAIT_RUNS 50 @@ -60,12 +59,39 @@ */ enum StartPhase { + /** + * FIXME. + */ SP_COPYING, + + /** + * FIXME. + */ SP_COPIED, + + /** + * FIXME. + */ SP_START_ARMING, + + /** + * FIXME. + */ SP_START_CORE, + + /** + * FIXME. + */ SP_START_DONE, + + /** + * FIXME. + */ SP_CLEANUP, + + /** + * FIXME. + */ SP_CONFIG_UPDATE }; @@ -839,21 +865,56 @@ void GNUNET_TESTING_daemon_reconfigure (struct GNUNET_TESTING_Daemon *d, d); } - +/** + * FIXME. + */ struct ConnectContext { + /** + * FIXME. + */ struct GNUNET_TESTING_Daemon *d1; + + /** + * FIXME. + */ struct GNUNET_TESTING_Daemon *d2; + + /** + * FIXME. + */ struct GNUNET_TRANSPORT_Handle *d1th; + + /** + * FIXME. + */ struct GNUNET_TRANSPORT_Handle *d2th; + + /** + * When should this operation be complete (or we must trigger + * a timeout). + */ struct GNUNET_TIME_Absolute timeout; + + /** + * Function to call once we are done (or have timed out). + */ GNUNET_TESTING_NotifyCompletion cb; + + /** + * Closure for "nb". + */ void *cb_cls; }; /** * Success, connection is up. Signal client our success. + * + * @param cls FIXME + * @param size number of bytes available in buf + * @param buf where to copy the message, NULL on error + * @return number of bytes copied to buf */ static size_t transmit_ready (void *cls, size_t size, void *buf) diff --git a/src/transport/transport_api.c b/src/transport/transport_api.c index b1b8ce445..9a7068e42 100644 --- a/src/transport/transport_api.c +++ b/src/transport/transport_api.c @@ -1458,10 +1458,7 @@ GNUNET_TRANSPORT_connect (struct GNUNET_SCHEDULER_Handle *sched, { struct GNUNET_TRANSPORT_Handle *ret; - GNUNET_ARM_start_service ("peerinfo", - cfg, sched, START_SERVICE_TIMEOUT, NULL, NULL); - GNUNET_ARM_start_service ("transport", - cfg, sched, START_SERVICE_TIMEOUT, NULL, NULL); + GNUNET_ARM_start_services (cfg, sched, "peerinfo", "transport", NULL); ret = GNUNET_malloc (sizeof (struct GNUNET_TRANSPORT_Handle)); ret->sched = sched; ret->cfg = cfg; @@ -1483,12 +1480,8 @@ static void stop_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) { struct GNUNET_TRANSPORT_Handle *handle = cls; - GNUNET_ARM_stop_service ("transport", - handle->cfg, - tc->sched, STOP_SERVICE_TIMEOUT, NULL, NULL); - GNUNET_ARM_stop_service ("peerinfo", - handle->cfg, - tc->sched, STOP_SERVICE_TIMEOUT, NULL, NULL); + + GNUNET_ARM_stop_services (handle->cfg, handle->sched, "transport", "peerinfo", NULL); }