2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file arm/gnunet-service-arm.c
23 * @brief the automated restart manager service
24 * @author Christian Grothoff
27 * - multiple start-stop requests with RC>1 can result
28 * in UP/DOWN signals based on "pending" that are inaccurate...
29 * => have list of clients waiting for a resolution instead of
30 * giving instant (but incorrect) replies
31 * - code could go into restart-loop for a service
32 * if service crashes instantly -- need exponential back-off
33 * - need to test auto-restart code on configuration changes;
34 * - should refine restart code to check if *relevant* parts of the
35 * configuration were changed (anything in the section for the service)
36 * - should have a way to specify dependencies between services and
37 * manage restarts of groups of services
40 #include "gnunet_client_lib.h"
41 #include "gnunet_getopt_lib.h"
42 #include "gnunet_os_lib.h"
43 #include "gnunet_protocols.h"
44 #include "gnunet_service_lib.h"
49 * Run maintenance every second.
51 #define MAINT_FREQUENCY GNUNET_TIME_UNIT_SECONDS
54 * How long do we wait until we decide that a service
57 #define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES
61 typedef void (*CleanCallback) (void *cls, struct ServiceList * pos);
64 * List of our services.
69 * This is a linked list.
71 struct ServiceList *next;
74 * Name of the service.
79 * Name of the binary used.
84 * Name of the configuration file used.
89 * Function to call upon kill completion (waitpid), NULL
90 * if we should simply restart the process.
92 CleanCallback kill_continuation;
95 * Closure for kill_continuation.
97 void *kill_continuation_cls;
100 * Process ID of the child.
105 * Last time the config of this service was
111 * Reference counter (counts how many times we've been
112 * asked to start the service). We only actually stop
113 * it once rc hits zero.
120 * List of running services.
122 static struct ServiceList *running;
127 static struct GNUNET_CONFIGURATION_Handle *cfg;
132 static struct GNUNET_SCHEDULER_Handle *sched;
135 * Command to prepend to each actual command.
137 static char *prefix_command;
141 write_result (void *cls, size_t size, void *buf)
144 struct GNUNET_MessageHeader *msg;
147 return 0; /* error, not much we can do */
148 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
150 msg->size = htons (sizeof (struct GNUNET_MessageHeader));
151 msg->type = htons (*res);
153 return sizeof (struct GNUNET_MessageHeader);
159 * Signal our client that we will start or stop the
162 * @return NULL if it was not found
165 signal_result (struct GNUNET_SERVER_Client *client,
166 const char *name, uint16_t result)
171 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
172 "Telling client that service `%s' is now %s\n",
174 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
176 res = GNUNET_malloc (sizeof (uint16_t));
178 GNUNET_SERVER_notify_transmit_ready (client,
179 sizeof (struct GNUNET_MessageHeader),
180 GNUNET_TIME_UNIT_FOREVER_REL,
186 * Find the process with the given service
187 * name in the given list, remove it and return it.
189 * @return NULL if it was not found
191 static struct ServiceList *
192 find_name (const char *name)
194 struct ServiceList *pos;
195 struct ServiceList *prev;
201 if (0 == strcmp (pos->name, name))
206 prev->next = pos->next;
218 free_entry (struct ServiceList *pos)
220 GNUNET_free_non_null (pos->config);
221 GNUNET_free_non_null (pos->binary);
222 GNUNET_free (pos->name);
230 * Actually start the process for the given service.
232 * @param sl identifies service to start
235 start_process (struct ServiceList *sl)
240 unsigned int argv_size;
243 const char *firstarg;
248 GNUNET_CONFIGURATION_get_value_string (cfg,
249 sl->name, "PREFIX", &loprefix))
250 loprefix = GNUNET_strdup (prefix_command);
252 GNUNET_CONFIGURATION_get_value_string (cfg,
253 sl->name, "OPTIONS", &options))
254 options = GNUNET_strdup ("");
255 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
257 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
259 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
260 "Starting service `%s' using binary `%s' and configuration `%s'\n",
261 sl->name, sl->binary, sl->config);
267 while ('\0' != *lopos)
274 while ('\0' != *optpos)
281 argv = GNUNET_malloc (argv_size * sizeof (char *));
285 while ('\0' != *lopos)
287 while (*lopos == ' ')
293 argv[argv_size++] = lopos;
294 while (('\0' != *lopos) && (' ' != *lopos))
302 firstarg = sl->binary;
303 argv[argv_size++] = sl->binary;
304 argv[argv_size++] = "-c";
305 argv[argv_size++] = sl->config;
306 if (GNUNET_YES == use_debug)
308 argv[argv_size++] = "-L";
309 argv[argv_size++] = "DEBUG";
312 while ('\0' != *optpos)
314 while (*optpos == ' ')
318 argv[argv_size++] = optpos;
319 while (('\0' != *optpos) && (' ' != *optpos))
326 argv[argv_size++] = NULL;
327 sl->pid = GNUNET_OS_start_process_v (firstarg, argv);
329 GNUNET_free (loprefix);
330 GNUNET_free (options);
335 * Start the specified service.
338 start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
340 struct ServiceList *sl;
344 sl = find_name (servicename);
347 /* already running, just increment RC */
348 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
349 _("Service `%s' already running.\n"), servicename);
353 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
357 GNUNET_CONFIGURATION_get_value_string (cfg,
358 servicename, "BINARY", &binary))
360 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
361 _("Binary implementing service `%s' not known!\n"),
363 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
367 GNUNET_CONFIGURATION_get_value_filename (cfg,
371 (0 != STAT (config, &sbuf)))
373 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
374 _("Configuration file `%s' for service `%s' not known!\n"),
375 config, servicename);
376 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
377 GNUNET_free (binary);
378 GNUNET_free (config);
381 sl = GNUNET_malloc (sizeof (struct ServiceList));
382 sl->name = GNUNET_strdup (servicename);
387 sl->mtime = sbuf.st_mtime;
390 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
395 free_and_signal (void *cls, struct ServiceList *pos)
397 struct GNUNET_SERVER_Client *client = cls;
398 /* find_name will remove "pos" from the list! */
399 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Service `%s' stopped\n", pos->name);
400 signal_result (client, pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
401 GNUNET_SERVER_receive_done (client, GNUNET_OK);
402 GNUNET_SERVER_client_drop (client);
408 * Stop the specified service.
411 stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
413 struct ServiceList *pos;
414 struct GNUNET_CLIENT_Connection *sc;
415 unsigned long long port;
417 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
418 "Preparing to stop `%s'\n", servicename);
419 pos = find_name (servicename);
420 if ((pos != NULL) && (pos->kill_continuation != NULL))
422 /* killing already in progress */
423 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
426 if ((pos != NULL) && (pos->rc > 1))
428 /* RC>1, just decrement RC */
432 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
433 GNUNET_SERVER_receive_done (client, GNUNET_OK);
438 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
439 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
442 pos->kill_continuation = &free_and_signal;
443 pos->kill_continuation_cls = client;
444 GNUNET_SERVER_client_keep (client);
449 GNUNET_CONFIGURATION_get_value_number (cfg,
453 (NULL != (sc = GNUNET_CLIENT_connect (sched, servicename, cfg))) )
455 GNUNET_CLIENT_service_shutdown (sc);
456 GNUNET_CLIENT_disconnect (sc);
457 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
461 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
463 GNUNET_SERVER_receive_done (client, GNUNET_OK);
469 * Handle START-message.
471 * @param cls closure (always NULL)
472 * @param client identification of the client
473 * @param message the actual message
474 * @return GNUNET_OK to keep the connection open,
475 * GNUNET_SYSERR to close it (signal serious error)
478 handle_start (void *cls,
479 struct GNUNET_SERVER_Client *client,
480 const struct GNUNET_MessageHeader *message)
482 const char *servicename;
485 size = ntohs (message->size);
486 size -= sizeof (struct GNUNET_MessageHeader);
487 servicename = (const char *) &message[1];
488 if ((size == 0) || (servicename[size - 1] != '\0'))
491 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
494 start_service (client, servicename);
495 GNUNET_SERVER_receive_done (client, GNUNET_OK);
500 * Handle STOP-message.
502 * @param cls closure (always NULL)
503 * @param client identification of the client
504 * @param message the actual message
505 * @return GNUNET_OK to keep the connection open,
506 * GNUNET_SYSERR to close it (signal serious error)
509 handle_stop (void *cls,
510 struct GNUNET_SERVER_Client *client,
511 const struct GNUNET_MessageHeader *message)
513 const char *servicename;
516 size = ntohs (message->size);
517 size -= sizeof (struct GNUNET_MessageHeader);
518 servicename = (const char *) &message[1];
519 if ((size == 0) || (servicename[size - 1] != '\0'))
522 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
525 stop_service (client, servicename);
531 * Background task doing maintenance.
537 maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
539 struct ServiceList *pos;
540 struct ServiceList *prev;
541 struct ServiceList *next;
547 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
549 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n"));
550 while (NULL != (pos = running))
553 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
554 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
555 if (GNUNET_OK != GNUNET_OS_process_wait(pos->pid))
556 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "waitpid");
561 GNUNET_SCHEDULER_add_delayed (tc->sched,
563 GNUNET_SCHEDULER_PRIORITY_IDLE,
564 GNUNET_SCHEDULER_NO_PREREQUISITE_TASK,
565 MAINT_FREQUENCY, &maint, cfg);
567 /* check for services that died (WAITPID) */
570 while (NULL != (pos = next))
572 enum GNUNET_OS_ProcessStatusType statusType;
573 unsigned long statusCode;
578 if (NULL != pos->kill_continuation)
584 pos->kill_continuation (pos->kill_continuation_cls, pos);
588 if ( (GNUNET_SYSERR == (ret = GNUNET_OS_process_status(pos->pid,
591 ( (ret == GNUNET_NO) ||
592 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
593 (statusType == GNUNET_OS_PROCESS_RUNNING) ) )
598 if (statusType == GNUNET_OS_PROCESS_EXITED)
600 statstr = _( /* process termination method */ "exit");
601 statcode = statusCode;
603 else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
605 statstr = _( /* process termination method */ "signal");
606 statcode = statusCode;
610 statstr = _( /* process termination method */ "unknown");
613 if (NULL != pos->kill_continuation)
619 pos->kill_continuation (pos->kill_continuation_cls, pos);
622 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
623 _("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
624 pos->name, statstr, statcode);
625 /* schedule restart */
630 /* check for services that need to be restarted due to
631 configuration changes or because the last restart failed */
635 if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
637 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
638 _("Restarting service `%s' due to configuration file change.\n"));
639 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
640 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
644 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
645 _("Restarting service `%s'.\n"), pos->name);
646 /* FIXME: should have some exponentially
647 increasing timer to avoid tight restart loops */
656 * List of handlers for the messages understood by this
659 static struct GNUNET_SERVER_MessageHandler handlers[] = {
660 {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
661 {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
667 * Process arm requests.
670 * @param s scheduler to use
671 * @param server the initialized server
672 * @param c configuration to use
676 struct GNUNET_SCHEDULER_Handle *s,
677 struct GNUNET_SERVER_Handle *server,
678 struct GNUNET_CONFIGURATION_Handle *c)
680 char *defaultservices;
683 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Starting...\n");
687 GNUNET_CONFIGURATION_get_value_string (cfg,
691 prefix_command = GNUNET_strdup ("");
692 /* start default services... */
694 GNUNET_CONFIGURATION_get_value_string (cfg,
700 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
701 "Starting default services `%s'\n", defaultservices);
703 pos = strtok (defaultservices, " ");
706 start_service (NULL, pos);
707 pos = strtok (NULL, " ");
709 GNUNET_free (defaultservices);
714 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
715 "No default services configured.\n");
719 /* process client requests */
720 GNUNET_SERVER_add_handlers (server, handlers);
722 /* manage services */
723 GNUNET_SCHEDULER_add_delayed (sched,
725 GNUNET_SCHEDULER_PRIORITY_IDLE,
726 GNUNET_SCHEDULER_NO_PREREQUISITE_TASK,
727 MAINT_FREQUENCY, &maint, NULL);
732 * The main function for the arm service.
734 * @param argc number of arguments from the command line
735 * @param argv command line arguments
736 * @return 0 ok, 1 on error
739 main (int argc, char *const *argv)
742 GNUNET_SERVICE_run (argc,
743 argv, "arm", &run, NULL, NULL, NULL)) ? 0 : 1;
746 /* end of gnunet-service-arm.c */