2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file arm/gnunet-service-arm.c
23 * @brief the automated restart manager service
24 * @author Christian Grothoff
27 * - multiple start-stop requests with RC>1 can result
28 * in UP/DOWN signals based on "pending" that are inaccurate...
29 * => have list of clients waiting for a resolution instead of
30 * giving instant (but incorrect) replies
31 * - need to test auto-restart code on configuration changes;
32 * - should refine restart code to check if *relevant* parts of the
33 * configuration were changed (anything in the section for the service)
34 * - should have a way to specify dependencies between services and
35 * manage restarts of groups of services
38 #include "gnunet_client_lib.h"
39 #include "gnunet_getopt_lib.h"
40 #include "gnunet_os_lib.h"
41 #include "gnunet_protocols.h"
42 #include "gnunet_service_lib.h"
43 #include "gnunet_signal_lib.h"
48 * Check for configuration file changes every 5s.
50 #define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 5)
53 * Threshold after which exponential backoff shouldn't increase (in ms); 30m
55 #define EXPONENTIAL_BACKOFF_THRESHOLD (1000 * 60 * 30)
59 * List of our services.
64 * List of our services.
69 * This is a linked list.
71 struct ServiceList *next;
74 * Name of the service.
79 * Name of the binary used.
84 * Name of the configuration file used.
89 * Client to notify upon kill completion (waitpid), NULL
90 * if we should simply restart the process.
92 struct GNUNET_SERVER_Client *killing_client;
95 * Process ID of the child.
100 * Last time the config of this service was
106 * Process exponential backoff time
108 struct GNUNET_TIME_Relative backoff;
111 * Absolute time at which the process is scheduled to restart in case of death
113 struct GNUNET_TIME_Absolute restartAt;
116 * Reference counter (counts how many times we've been
117 * asked to start the service). We only actually stop
118 * it once rc hits zero.
125 * List of running services.
127 static struct ServiceList *running;
132 static const struct GNUNET_CONFIGURATION_Handle *cfg;
137 static struct GNUNET_SCHEDULER_Handle *sched;
140 * Command to prepend to each actual command.
142 static char *prefix_command;
145 * Option to append to each actual command.
147 static char *final_option;
150 * ID of task called whenever we get a SIGCHILD.
152 static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
155 * ID of task called whenever the timeout for restarting a child
158 static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
161 * Context for our SIGCHILD handler.
163 static struct GNUNET_SIGNAL_Context *shc_chld;
166 * Pipe used to communicate shutdown via signal.
168 static struct GNUNET_DISK_PipeHandle *sigpipe;
171 * Reading end of the signal pipe.
173 static const struct GNUNET_DISK_FileHandle *pr;
176 * Are we in shutdown mode?
178 static int in_shutdown;
182 * Handle to our server instance. Our server is a bit special in that
183 * its service is not immediately stopped once we get a shutdown
184 * request (since we need to continue service until all of our child
185 * processes are dead). This handle is used to shut down the server
186 * (and thus trigger process termination) once all child processes are
187 * also dead. A special option in the ARM configuration modifies the
188 * behaviour of the service implementation to not do the shutdown
191 static struct GNUNET_SERVER_Handle *server;
195 * If the configuration file changes, restart tasks that depended on that
198 * @param cls closure, NULL if we need to self-restart
202 config_change_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
204 struct ServiceList *pos;
210 /* FIXME: this test for config change is a bit too coarse grained */
211 if ( (0 == STAT (pos->config, &sbuf)) &&
212 (pos->mtime < sbuf.st_mtime) &&
215 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
216 _("Restarting service `%s' due to configuration file change.\n"));
217 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
218 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
220 pos->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
229 * Transmit a status result message.
231 * @param cls pointer to "unit16_t*" with message type
232 * @param size number of bytes available in buf
233 * @param buf where to copy the message, NULL on error
234 * @return number of bytes copied to buf
237 write_result (void *cls, size_t size, void *buf)
240 struct GNUNET_MessageHeader *msg;
244 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
245 _("Could not send status result to client\n"));
246 return 0; /* error, not much we can do */
249 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
250 "Sending status response %u to client\n",
251 (unsigned int) *res);
253 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
255 msg->size = htons (sizeof (struct GNUNET_MessageHeader));
256 msg->type = htons (*res);
258 return sizeof (struct GNUNET_MessageHeader);
264 * Signal our client that we will start or stop the
267 * @param client who is being signalled
268 * @param name name of the service
269 * @param result message type to send
270 * @return NULL if it was not found
273 signal_result (struct GNUNET_SERVER_Client *client,
274 const char *name, uint16_t result)
280 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
282 ("Not sending status result to client: no client known\n"));
286 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
287 "Telling client that service `%s' is now %s\n",
289 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
291 res = GNUNET_malloc (sizeof (uint16_t));
293 GNUNET_SERVER_notify_transmit_ready (client,
294 sizeof (struct GNUNET_MessageHeader),
295 GNUNET_TIME_UNIT_FOREVER_REL,
301 * Find the process with the given service
302 * name in the given list, remove it and return it.
304 * @param name which service entry to look up
305 * @return NULL if it was not found
307 static struct ServiceList *
308 find_name (const char *name)
310 struct ServiceList *pos;
311 struct ServiceList *prev;
317 if (0 == strcmp (pos->name, name))
322 prev->next = pos->next;
334 * Free an entry in the service list.
336 * @param pos entry to free
339 free_entry (struct ServiceList *pos)
341 GNUNET_free_non_null (pos->config);
342 GNUNET_free_non_null (pos->binary);
343 GNUNET_free (pos->name);
347 #include "do_start_process.c"
350 * Actually start the process for the given service.
352 * @param sl identifies service to start
355 start_process (struct ServiceList *sl)
368 GNUNET_CONFIGURATION_get_value_string (cfg,
369 sl->name, "PREFIX", &loprefix))
370 loprefix = GNUNET_strdup (prefix_command);
372 GNUNET_CONFIGURATION_get_value_string (cfg,
373 sl->name, "OPTIONS", &options))
375 options = GNUNET_strdup (final_option);
376 if (NULL == strstr (options, "%"))
378 /* replace '{}' with service name */
379 while (NULL != (optpos = strstr (options, "{}")))
383 GNUNET_asprintf (&optpos,
386 GNUNET_free (options);
389 /* replace '$PATH' with value associated with "PATH" */
390 while (NULL != (optpos = strstr (options, "$")))
393 while (isupper (*optend)) optend++;
401 GNUNET_CONFIGURATION_get_value_string (cfg, "PATHS",
404 val = GNUNET_strdup ("");
406 GNUNET_asprintf (&optpos,
412 GNUNET_free (options);
418 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
420 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
422 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
423 "Starting service `%s' using binary `%s' and configuration `%s'\n",
424 sl->name, sl->binary, sl->config);
426 if (GNUNET_YES == use_debug)
427 sl->pid = do_start_process (loprefix,
434 sl->pid = do_start_process (loprefix,
439 GNUNET_free (loprefix);
440 GNUNET_free (options);
441 /* FIXME: should check sl->pid */
446 * Start the specified service.
448 * @param client who is asking for this
449 * @param servicename name of the service to start
452 start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
454 struct ServiceList *sl;
459 if (GNUNET_YES == in_shutdown)
461 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
462 _("ARM is shutting down, service `%s' not started.\n"),
464 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
467 sl = find_name (servicename);
470 /* already running, just increment RC */
471 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
472 _("Service `%s' already running.\n"), servicename);
476 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
480 GNUNET_CONFIGURATION_get_value_string (cfg,
481 servicename, "BINARY", &binary))
483 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
484 _("Binary implementing service `%s' not known!\n"),
486 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
490 GNUNET_CONFIGURATION_get_value_filename (cfg,
494 (0 != STAT (config, &sbuf)))
496 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
497 _("Configuration file `%s' for service `%s' not known!\n"),
498 config, servicename);
499 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
500 GNUNET_free (binary);
501 GNUNET_free_non_null (config);
504 sl = GNUNET_malloc (sizeof (struct ServiceList));
505 sl->name = GNUNET_strdup (servicename);
510 sl->mtime = sbuf.st_mtime;
511 sl->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
512 sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS;
517 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
522 * Stop the specified service.
524 * @param client who is asking for this
525 * @param servicename name of the service to stop
528 stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
530 struct ServiceList *pos;
532 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
533 _("Preparing to stop `%s'\n"), servicename);
534 pos = find_name (servicename);
537 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
538 GNUNET_SERVER_receive_done (client, GNUNET_OK);
543 /* RC>1, just decrement RC */
548 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
549 "Service `%s' still used by %u clients, will keep it running!\n",
550 servicename, pos->rc);
552 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
553 GNUNET_SERVER_receive_done (client, GNUNET_OK);
557 pos->rc--; /* decrement RC to zero */
558 if (pos->killing_client != NULL)
560 /* killing already in progress */
562 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
563 "Service `%s' is already down\n", servicename);
565 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
566 GNUNET_SERVER_receive_done (client, GNUNET_OK);
572 if (GNUNET_YES == in_shutdown)
575 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
576 "Termination request already sent to `%s' (since ARM is in shutdown).\n",
579 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
580 GNUNET_SERVER_receive_done (client, GNUNET_OK);
587 /* process is in delayed restart, simply remove it! */
589 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
590 GNUNET_SERVER_receive_done (client, GNUNET_OK);
594 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
595 "Sending kill signal to service `%s', waiting for process to die.\n",
598 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
599 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
602 pos->killing_client = client;
603 GNUNET_SERVER_client_keep (client);
608 * Handle START-message.
610 * @param cls closure (always NULL)
611 * @param client identification of the client
612 * @param message the actual message
613 * @return GNUNET_OK to keep the connection open,
614 * GNUNET_SYSERR to close it (signal serious error)
617 handle_start (void *cls,
618 struct GNUNET_SERVER_Client *client,
619 const struct GNUNET_MessageHeader *message)
621 const char *servicename;
624 size = ntohs (message->size);
625 size -= sizeof (struct GNUNET_MessageHeader);
626 servicename = (const char *) &message[1];
627 if ((size == 0) || (servicename[size - 1] != '\0'))
630 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
633 start_service (client, servicename);
634 GNUNET_SERVER_receive_done (client, GNUNET_OK);
639 * Handle STOP-message.
641 * @param cls closure (always NULL)
642 * @param client identification of the client
643 * @param message the actual message
644 * @return GNUNET_OK to keep the connection open,
645 * GNUNET_SYSERR to close it (signal serious error)
648 handle_stop (void *cls,
649 struct GNUNET_SERVER_Client *client,
650 const struct GNUNET_MessageHeader *message)
652 const char *servicename;
655 size = ntohs (message->size);
656 size -= sizeof (struct GNUNET_MessageHeader);
657 servicename = (const char *) &message[1];
658 if ((size == 0) || (servicename[size - 1] != '\0'))
661 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
664 stop_service (client, servicename);
669 * Remove all entries for tasks that are not running
670 * (pid = 0) from the running list (they will no longer
671 * be restarted since we are shutting down).
676 struct ServiceList *pos;
677 struct ServiceList *next;
678 struct ServiceList *prev;
701 * We are done with everything. Stop remaining
702 * tasks, signal handler and the server.
707 GNUNET_SERVER_destroy (server);
709 GNUNET_SIGNAL_handler_uninstall (shc_chld);
711 GNUNET_SCHEDULER_cancel (sched, child_death_task);
712 child_death_task = GNUNET_SCHEDULER_NO_TASK;
717 * Task run for shutdown.
719 * @param cls closure, NULL if we need to self-restart
723 shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
725 struct ServiceList *pos;
728 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("Stopping all services\n"));
730 in_shutdown = GNUNET_YES;
737 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
738 "Sending SIGTERM to `%s'\n", pos->name);
740 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
741 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
751 * Task run whenever it is time to restart a child that died.
753 * @param cls closure, always NULL
757 delayed_restart_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
759 struct ServiceList *pos;
760 struct GNUNET_TIME_Relative lowestRestartDelay;
762 child_restart_task = GNUNET_SCHEDULER_NO_TASK;
763 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
770 lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
772 /* check for services that need to be restarted due to
773 configuration changes or because the last restart failed */
777 if ( (pos->pid == 0) &&
778 (GNUNET_YES != in_shutdown) )
780 if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
782 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
783 _("Restarting service `%s'.\n"), pos->name);
789 = GNUNET_TIME_relative_min (lowestRestartDelay,
790 GNUNET_TIME_absolute_get_remaining
796 if (lowestRestartDelay.value != GNUNET_TIME_UNIT_FOREVER_REL.value)
799 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
800 "Will restart process in %llums\n",
801 (unsigned long long) lowestRestartDelay.value);
804 = GNUNET_SCHEDULER_add_delayed (sched,
806 &delayed_restart_task,
813 * Task triggered whenever we receive a SIGCHLD (child
816 * @param cls closure, NULL if we need to self-restart
820 maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
822 struct ServiceList *pos;
823 struct ServiceList *prev;
824 struct ServiceList *next;
829 enum GNUNET_OS_ProcessStatusType statusType;
830 unsigned long statusCode;
832 child_death_task = GNUNET_SCHEDULER_NO_TASK;
833 if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
836 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
837 &maint_child_death, NULL);
840 /* consume the signal */
841 GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c)));
843 /* check for services that died (WAITPID) */
846 while (NULL != (pos = next))
854 if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
857 ( (ret == GNUNET_NO) ||
858 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
859 (statusType == GNUNET_OS_PROCESS_RUNNING)) )
865 if (statusType == GNUNET_OS_PROCESS_EXITED)
867 statstr = _( /* process termination method */ "exit");
868 statcode = statusCode;
870 else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
872 statstr = _( /* process termination method */ "signal");
873 statcode = statusCode;
877 statstr = _( /* process termination method */ "unknown");
881 if (NULL != pos->killing_client)
887 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
888 _("Service `%s' stopped\n"),
890 signal_result (pos->killing_client,
891 pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
892 GNUNET_SERVER_receive_done (pos->killing_client, GNUNET_OK);
893 GNUNET_SERVER_client_drop (pos->killing_client);
897 if (GNUNET_YES != in_shutdown)
899 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
901 ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
902 pos->name, statstr, statcode);
903 /* schedule restart */
905 = GNUNET_TIME_relative_to_absolute (pos->backoff);
906 if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
908 = GNUNET_TIME_relative_multiply (pos->backoff, 2);
909 if (GNUNET_SCHEDULER_NO_TASK != child_restart_task)
910 GNUNET_SCHEDULER_cancel (sched, child_restart_task);
912 = GNUNET_SCHEDULER_add_with_priority (sched,
913 GNUNET_SCHEDULER_PRIORITY_IDLE,
914 &delayed_restart_task,
919 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
920 "Service `%s' terminated with status %s/%d\n",
921 pos->name, statstr, statcode);
927 if ( (running == NULL) &&
930 GNUNET_SERVER_destroy (server);
931 GNUNET_SIGNAL_handler_uninstall (shc_chld);
937 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
938 &maint_child_death, NULL);
944 * List of handlers for the messages understood by this service.
946 static struct GNUNET_SERVER_MessageHandler handlers[] = {
947 {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
948 {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
953 * Signal handler called for SIGCHLD. Triggers the
954 * respective handler by writing to the trigger pipe.
957 sighandler_child_death ()
962 GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
963 (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
969 * Process arm requests.
972 * @param s scheduler to use
973 * @param serv the initialized server
974 * @param c configuration to use
978 struct GNUNET_SCHEDULER_Handle *s,
979 struct GNUNET_SERVER_Handle *serv,
980 const struct GNUNET_CONFIGURATION_Handle *c)
982 char *defaultservices;
988 GNUNET_assert (serv != NULL);
989 shc_chld = GNUNET_SIGNAL_handler_install (GNUNET_SIGCHLD, &sighandler_child_death);
990 GNUNET_assert (sigpipe == NULL);
991 sigpipe = GNUNET_DISK_pipe (GNUNET_NO);
992 GNUNET_assert (sigpipe != NULL);
993 pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
994 GNUNET_assert (pr != NULL);
995 GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES);
996 GNUNET_SCHEDULER_add_delayed (sched,
997 GNUNET_TIME_UNIT_FOREVER_REL,
1001 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
1002 &maint_child_death, NULL);
1005 GNUNET_CONFIGURATION_get_value_string (cfg,
1009 prefix_command = GNUNET_strdup ("");
1011 GNUNET_CONFIGURATION_get_value_string (cfg,
1015 final_option = GNUNET_strdup ("");
1016 /* start default services... */
1018 GNUNET_CONFIGURATION_get_value_string (cfg,
1024 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1025 "Starting default services `%s'\n", defaultservices);
1027 pos = strtok (defaultservices, " ");
1030 start_service (NULL, pos);
1031 pos = strtok (NULL, " ");
1033 GNUNET_free (defaultservices);
1038 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1039 "No default services configured.\n");
1043 /* process client requests */
1044 GNUNET_SERVER_add_handlers (server, handlers);
1046 /* manage services */
1047 GNUNET_SCHEDULER_add_with_priority (sched,
1048 GNUNET_SCHEDULER_PRIORITY_IDLE,
1049 &config_change_task, NULL);
1054 * The main function for the arm service.
1056 * @param argc number of arguments from the command line
1057 * @param argv command line arguments
1058 * @return 0 ok, 1 on error
1061 main (int argc, char *const *argv)
1063 return (GNUNET_OK ==
1064 GNUNET_SERVICE_run (argc,
1065 argv, "arm", GNUNET_YES, &run, NULL)) ? 0 : 1;
1068 /* end of gnunet-service-arm.c */