2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file arm/gnunet-service-arm.c
23 * @brief the automated restart manager service
24 * @author Christian Grothoff
27 * - multiple start-stop requests with RC>1 can result
28 * in UP/DOWN signals based on "pending" that are inaccurate...
29 * => have list of clients waiting for a resolution instead of
30 * giving instant (but incorrect) replies
31 * - need to test auto-restart code on configuration changes;
32 * - should refine restart code to check if *relevant* parts of the
33 * configuration were changed (anything in the section for the service)
34 * - should have a way to specify dependencies between services and
35 * manage restarts of groups of services
38 #include "gnunet_client_lib.h"
39 #include "gnunet_getopt_lib.h"
40 #include "gnunet_os_lib.h"
41 #include "gnunet_protocols.h"
42 #include "gnunet_service_lib.h"
43 #include "gnunet_signal_lib.h"
48 * Check for configuration file changes every 5s.
50 #define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 5)
53 * Threshold after which exponential backoff shouldn't increase (in ms); 30m
55 #define EXPONENTIAL_BACKOFF_THRESHOLD (1000 * 60 * 30)
59 * List of our services.
64 * List of our services.
69 * This is a linked list.
71 struct ServiceList *next;
74 * Name of the service.
79 * Name of the binary used.
84 * Name of the configuration file used.
89 * Client to notify upon kill completion (waitpid), NULL
90 * if we should simply restart the process.
92 struct GNUNET_SERVER_Client *killing_client;
95 * Process ID of the child.
100 * Last time the config of this service was
106 * Process exponential backoff time
108 struct GNUNET_TIME_Relative backoff;
111 * Absolute time at which the process is scheduled to restart in case of death
113 struct GNUNET_TIME_Absolute restartAt;
116 * Reference counter (counts how many times we've been
117 * asked to start the service). We only actually stop
118 * it once rc hits zero.
125 * List of running services.
127 static struct ServiceList *running;
132 static const struct GNUNET_CONFIGURATION_Handle *cfg;
137 static struct GNUNET_SCHEDULER_Handle *sched;
140 * Command to prepend to each actual command.
142 static char *prefix_command;
145 * ID of task called whenever we get a SIGCHILD.
147 static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
150 * ID of task called whenever the timeout for restarting a child
153 static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
156 * Context for our SIGCHILD handler.
158 static struct GNUNET_SIGNAL_Context *shc_chld;
161 * Pipe used to communicate shutdown via signal.
163 static struct GNUNET_DISK_PipeHandle *sigpipe;
166 * Reading end of the signal pipe.
168 static const struct GNUNET_DISK_FileHandle *pr;
171 * Are we in shutdown mode?
173 static int in_shutdown;
177 * Handle to our server instance. Our server is a bit special in that
178 * its service is not immediately stopped once we get a shutdown
179 * request (since we need to continue service until all of our child
180 * processes are dead). This handle is used to shut down the server
181 * (and thus trigger process termination) once all child processes are
182 * also dead. A special option in the ARM configuration modifies the
183 * behaviour of the service implementation to not do the shutdown
186 static struct GNUNET_SERVER_Handle *server;
190 * If the configuration file changes, restart tasks that depended on that
193 * @param cls closure, NULL if we need to self-restart
197 config_change_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
199 struct ServiceList *pos;
205 /* FIXME: this test for config change is a bit too coarse grained */
206 if ( (0 == STAT (pos->config, &sbuf)) &&
207 (pos->mtime < sbuf.st_mtime) &&
210 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
211 _("Restarting service `%s' due to configuration file change.\n"));
212 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
213 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
215 pos->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
224 * Transmit a status result message.
226 * @param cls pointer to "unit16_t*" with message type
227 * @param size number of bytes available in buf
228 * @param buf where to copy the message, NULL on error
229 * @return number of bytes copied to buf
232 write_result (void *cls, size_t size, void *buf)
235 struct GNUNET_MessageHeader *msg;
239 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
240 _("Could not send status result to client\n"));
241 return 0; /* error, not much we can do */
243 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
245 msg->size = htons (sizeof (struct GNUNET_MessageHeader));
246 msg->type = htons (*res);
248 return sizeof (struct GNUNET_MessageHeader);
254 * Signal our client that we will start or stop the
257 * @param client who is being signalled
258 * @param name name of the service
259 * @param result message type to send
260 * @return NULL if it was not found
263 signal_result (struct GNUNET_SERVER_Client *client,
264 const char *name, uint16_t result)
270 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
272 ("Not sending status result to client: no client known\n"));
276 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
277 "Telling client that service `%s' is now %s\n",
279 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
281 res = GNUNET_malloc (sizeof (uint16_t));
283 GNUNET_SERVER_notify_transmit_ready (client,
284 sizeof (struct GNUNET_MessageHeader),
285 GNUNET_TIME_UNIT_FOREVER_REL,
291 * Find the process with the given service
292 * name in the given list, remove it and return it.
294 * @param name which service entry to look up
295 * @return NULL if it was not found
297 static struct ServiceList *
298 find_name (const char *name)
300 struct ServiceList *pos;
301 struct ServiceList *prev;
307 if (0 == strcmp (pos->name, name))
312 prev->next = pos->next;
324 * Free an entry in the service list.
326 * @param pos entry to free
329 free_entry (struct ServiceList *pos)
331 GNUNET_free_non_null (pos->config);
332 GNUNET_free_non_null (pos->binary);
333 GNUNET_free (pos->name);
339 * Actually start the process for the given service.
341 * @param sl identifies service to start
344 start_process (struct ServiceList *sl)
349 unsigned int argv_size;
352 const char *firstarg;
357 GNUNET_CONFIGURATION_get_value_string (cfg,
358 sl->name, "PREFIX", &loprefix))
359 loprefix = GNUNET_strdup (prefix_command);
361 GNUNET_CONFIGURATION_get_value_string (cfg,
362 sl->name, "OPTIONS", &options))
363 options = GNUNET_strdup ("");
364 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
366 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
368 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
369 "Starting service `%s' using binary `%s' and configuration `%s'\n",
370 sl->name, sl->binary, sl->config);
376 while ('\0' != *lopos)
383 while ('\0' != *optpos)
390 argv = GNUNET_malloc (argv_size * sizeof (char *));
394 while ('\0' != *lopos)
396 while (*lopos == ' ')
402 argv[argv_size++] = lopos;
403 while (('\0' != *lopos) && (' ' != *lopos))
411 firstarg = sl->binary;
412 argv[argv_size++] = sl->binary;
413 argv[argv_size++] = "-c";
414 argv[argv_size++] = sl->config;
415 if (GNUNET_YES == use_debug)
417 argv[argv_size++] = "-L";
418 argv[argv_size++] = "DEBUG";
421 while ('\0' != *optpos)
423 while (*optpos == ' ')
427 argv[argv_size++] = optpos;
428 while (('\0' != *optpos) && (' ' != *optpos))
435 argv[argv_size++] = NULL;
436 sl->pid = GNUNET_OS_start_process_v (firstarg, argv);
437 /* FIXME: should check sl->pid */
439 GNUNET_free (loprefix);
440 GNUNET_free (options);
445 * Start the specified service.
447 * @param client who is asking for this
448 * @param servicename name of the service to start
451 start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
453 struct ServiceList *sl;
458 if (GNUNET_YES == in_shutdown)
460 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
461 _("ARM is shutting down, service `%s' not started.\n"),
463 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
466 sl = find_name (servicename);
469 /* already running, just increment RC */
470 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
471 _("Service `%s' already running.\n"), servicename);
475 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
479 GNUNET_CONFIGURATION_get_value_string (cfg,
480 servicename, "BINARY", &binary))
482 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
483 _("Binary implementing service `%s' not known!\n"),
485 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
489 GNUNET_CONFIGURATION_get_value_filename (cfg,
493 (0 != STAT (config, &sbuf)))
495 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
496 _("Configuration file `%s' for service `%s' not known!\n"),
497 config, servicename);
498 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
499 GNUNET_free (binary);
500 GNUNET_free_non_null (config);
503 sl = GNUNET_malloc (sizeof (struct ServiceList));
504 sl->name = GNUNET_strdup (servicename);
509 sl->mtime = sbuf.st_mtime;
510 sl->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
511 sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS;
516 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
521 * Stop the specified service.
523 * @param client who is asking for this
524 * @param servicename name of the service to stop
527 stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
529 struct ServiceList *pos;
531 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
532 _("Preparing to stop `%s'\n"), servicename);
533 pos = find_name (servicename);
536 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
537 GNUNET_SERVER_receive_done (client, GNUNET_OK);
542 /* RC>1, just decrement RC */
547 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
548 "Service `%s' still used by %u clients, will keep it running!\n",
549 servicename, pos->rc);
551 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
552 GNUNET_SERVER_receive_done (client, GNUNET_OK);
556 pos->rc--; /* decrement RC to zero */
557 if (pos->killing_client != NULL)
559 /* killing already in progress */
561 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
562 "Service `%s' is already down\n", servicename);
564 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
565 GNUNET_SERVER_receive_done (client, GNUNET_OK);
569 if (GNUNET_YES == in_shutdown)
572 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
573 "Termination request already sent to `%s' (since ARM is in shutdown).\n",
576 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
577 GNUNET_SERVER_receive_done (client, GNUNET_OK);
581 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
582 "Sending kill signal to service `%s', waiting for process to die.\n",
587 /* process is in delayed restart, simply remove it! */
589 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
590 GNUNET_SERVER_receive_done (client, GNUNET_OK);
594 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
595 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
598 pos->killing_client = client;
599 GNUNET_SERVER_client_keep (client);
604 * Handle START-message.
606 * @param cls closure (always NULL)
607 * @param client identification of the client
608 * @param message the actual message
609 * @return GNUNET_OK to keep the connection open,
610 * GNUNET_SYSERR to close it (signal serious error)
613 handle_start (void *cls,
614 struct GNUNET_SERVER_Client *client,
615 const struct GNUNET_MessageHeader *message)
617 const char *servicename;
620 size = ntohs (message->size);
621 size -= sizeof (struct GNUNET_MessageHeader);
622 servicename = (const char *) &message[1];
623 if ((size == 0) || (servicename[size - 1] != '\0'))
626 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
629 start_service (client, servicename);
630 GNUNET_SERVER_receive_done (client, GNUNET_OK);
635 * Handle STOP-message.
637 * @param cls closure (always NULL)
638 * @param client identification of the client
639 * @param message the actual message
640 * @return GNUNET_OK to keep the connection open,
641 * GNUNET_SYSERR to close it (signal serious error)
644 handle_stop (void *cls,
645 struct GNUNET_SERVER_Client *client,
646 const struct GNUNET_MessageHeader *message)
648 const char *servicename;
651 size = ntohs (message->size);
652 size -= sizeof (struct GNUNET_MessageHeader);
653 servicename = (const char *) &message[1];
654 if ((size == 0) || (servicename[size - 1] != '\0'))
657 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
660 stop_service (client, servicename);
665 * Remove all entries for tasks that are not running
666 * (pid = 0) from the running list (they will no longer
667 * be restarted since we are shutting down).
672 struct ServiceList *pos;
673 struct ServiceList *next;
674 struct ServiceList *prev;
697 * We are done with everything. Stop remaining
698 * tasks, signal handler and the server.
703 GNUNET_SERVER_destroy (server);
705 GNUNET_SIGNAL_handler_uninstall (shc_chld);
707 GNUNET_SCHEDULER_cancel (sched, child_death_task);
708 child_death_task = GNUNET_SCHEDULER_NO_TASK;
713 * Task run for shutdown.
715 * @param cls closure, NULL if we need to self-restart
719 shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
721 struct ServiceList *pos;
724 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("Stopping all services\n"));
726 in_shutdown = GNUNET_YES;
733 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
734 "Sending SIGTERM to `%s'\n", pos->name);
736 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
737 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
747 * Task run whenever it is time to restart a child that died.
749 * @param cls closure, always NULL
753 delayed_restart_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
755 struct ServiceList *pos;
756 struct GNUNET_TIME_Relative lowestRestartDelay;
758 child_restart_task = GNUNET_SCHEDULER_NO_TASK;
759 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
766 lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
768 /* check for services that need to be restarted due to
769 configuration changes or because the last restart failed */
773 if ( (pos->pid == 0) &&
774 (GNUNET_YES != in_shutdown) )
776 if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
778 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
779 _("Restarting service `%s'.\n"), pos->name);
785 = GNUNET_TIME_relative_min (lowestRestartDelay,
786 GNUNET_TIME_absolute_get_remaining
792 if (lowestRestartDelay.value != GNUNET_TIME_UNIT_FOREVER_REL.value)
795 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
796 "Will restart process in %llums\n",
797 (unsigned long long) lowestRestartDelay.value);
800 = GNUNET_SCHEDULER_add_delayed (sched,
802 &delayed_restart_task,
809 * Task triggered whenever we receive a SIGCHLD (child
812 * @param cls closure, NULL if we need to self-restart
816 maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
818 struct ServiceList *pos;
819 struct ServiceList *prev;
820 struct ServiceList *next;
825 enum GNUNET_OS_ProcessStatusType statusType;
826 unsigned long statusCode;
828 child_death_task = GNUNET_SCHEDULER_NO_TASK;
829 if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
832 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
833 &maint_child_death, NULL);
836 /* consume the signal */
837 GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c)));
839 /* check for services that died (WAITPID) */
842 while (NULL != (pos = next))
850 if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
853 ( (ret == GNUNET_NO) ||
854 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
855 (statusType == GNUNET_OS_PROCESS_RUNNING)) )
860 if (statusType == GNUNET_OS_PROCESS_EXITED)
862 statstr = _( /* process termination method */ "exit");
863 statcode = statusCode;
865 else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
867 statstr = _( /* process termination method */ "signal");
868 statcode = statusCode;
872 statstr = _( /* process termination method */ "unknown");
876 if (NULL != pos->killing_client)
882 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
883 _("Service `%s' stopped\n"),
885 signal_result (pos->killing_client,
886 pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
887 GNUNET_SERVER_receive_done (pos->killing_client, GNUNET_OK);
888 GNUNET_SERVER_client_drop (pos->killing_client);
892 if (GNUNET_YES != in_shutdown)
894 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
896 ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
897 pos->name, statstr, statcode);
898 /* schedule restart */
900 = GNUNET_TIME_relative_to_absolute (pos->backoff);
901 if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
903 = GNUNET_TIME_relative_multiply (pos->backoff, 2);
904 if (GNUNET_SCHEDULER_NO_TASK != child_restart_task)
905 GNUNET_SCHEDULER_cancel (sched, child_restart_task);
907 = GNUNET_SCHEDULER_add_with_priority (sched,
908 GNUNET_SCHEDULER_PRIORITY_IDLE,
909 &delayed_restart_task,
914 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
915 "Service `%s' terminated with status %s/%d\n",
916 pos->name, statstr, statcode);
922 if ( (running == NULL) &&
925 GNUNET_SERVER_destroy (server);
926 GNUNET_SIGNAL_handler_uninstall (shc_chld);
932 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
933 &maint_child_death, NULL);
939 * List of handlers for the messages understood by this service.
941 static struct GNUNET_SERVER_MessageHandler handlers[] = {
942 {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
943 {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
948 * Signal handler called for SIGCHLD. Triggers the
949 * respective handler by writing to the trigger pipe.
952 sighandler_child_death ()
957 GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
958 (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
964 * Process arm requests.
967 * @param s scheduler to use
968 * @param serv the initialized server
969 * @param c configuration to use
973 struct GNUNET_SCHEDULER_Handle *s,
974 struct GNUNET_SERVER_Handle *serv,
975 const struct GNUNET_CONFIGURATION_Handle *c)
977 char *defaultservices;
983 GNUNET_assert (serv != NULL);
984 shc_chld = GNUNET_SIGNAL_handler_install (SIGCHLD, &sighandler_child_death);
985 GNUNET_assert (sigpipe == NULL);
986 sigpipe = GNUNET_DISK_pipe (GNUNET_NO);
987 GNUNET_assert (sigpipe != NULL);
988 pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
989 GNUNET_assert (pr != NULL);
990 GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES);
991 GNUNET_SCHEDULER_add_delayed (sched,
992 GNUNET_TIME_UNIT_FOREVER_REL,
996 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
997 &maint_child_death, NULL);
1000 GNUNET_CONFIGURATION_get_value_string (cfg,
1004 prefix_command = GNUNET_strdup ("");
1005 /* start default services... */
1007 GNUNET_CONFIGURATION_get_value_string (cfg,
1013 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1014 "Starting default services `%s'\n", defaultservices);
1016 pos = strtok (defaultservices, " ");
1019 start_service (NULL, pos);
1020 pos = strtok (NULL, " ");
1022 GNUNET_free (defaultservices);
1027 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1028 "No default services configured.\n");
1032 /* process client requests */
1033 GNUNET_SERVER_add_handlers (server, handlers);
1035 /* manage services */
1036 GNUNET_SCHEDULER_add_with_priority (sched,
1037 GNUNET_SCHEDULER_PRIORITY_IDLE,
1038 &config_change_task, NULL);
1043 * The main function for the arm service.
1045 * @param argc number of arguments from the command line
1046 * @param argv command line arguments
1047 * @return 0 ok, 1 on error
1050 main (int argc, char *const *argv)
1052 return (GNUNET_OK ==
1053 GNUNET_SERVICE_run (argc,
1054 argv, "arm", GNUNET_YES, &run, NULL)) ? 0 : 1;
1057 /* end of gnunet-service-arm.c */