2 This file is part of GNUnet.
3 (C) 2009 Christian Grothoff (and other contributing authors)
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
22 * @file arm/gnunet-service-arm.c
23 * @brief the automated restart manager service
24 * @author Christian Grothoff
27 * - multiple start-stop requests with RC>1 can result
28 * in UP/DOWN signals based on "pending" that are inaccurate...
29 * => have list of clients waiting for a resolution instead of
30 * giving instant (but incorrect) replies
31 * - code could go into restart-loop for a service
32 * if service crashes instantly -- need exponential back-off
33 * - need to test auto-restart code on configuration changes;
34 * - should refine restart code to check if *relevant* parts of the
35 * configuration were changed (anything in the section for the service)
36 * - should have a way to specify dependencies between services and
37 * manage restarts of groups of services
40 #include "gnunet_client_lib.h"
41 #include "gnunet_getopt_lib.h"
42 #include "gnunet_os_lib.h"
43 #include "gnunet_protocols.h"
44 #include "gnunet_service_lib.h"
49 * Run normal maintenance every 2s.
51 #define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 2)
54 * Run fast maintenance after 100ms. This is used for an extra-job
55 * that is run to check for a process that we just killed.
57 #define MAINT_FAST_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 100)
60 * How long do we wait until we decide that a service
63 #define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES
66 * List of our services.
71 * Function to call if waitpid informs us that
75 * @param pos entry in the service list of the process that died
77 typedef void (*CleanCallback) (void *cls, struct ServiceList * pos);
80 * List of our services.
85 * This is a linked list.
87 struct ServiceList *next;
90 * Name of the service.
95 * Name of the binary used.
100 * Name of the configuration file used.
105 * Function to call upon kill completion (waitpid), NULL
106 * if we should simply restart the process.
108 CleanCallback kill_continuation;
111 * Closure for kill_continuation.
113 void *kill_continuation_cls;
116 * Process ID of the child.
121 * Last time the config of this service was
127 * Reference counter (counts how many times we've been
128 * asked to start the service). We only actually stop
129 * it once rc hits zero.
136 * List of running services.
138 static struct ServiceList *running;
143 static const struct GNUNET_CONFIGURATION_Handle *cfg;
148 static struct GNUNET_SCHEDULER_Handle *sched;
151 * Command to prepend to each actual command.
153 static char *prefix_command;
157 * Background task doing maintenance.
159 * @param cls closure, NULL if we need to self-restart
163 maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
167 * Transmit a status result message.
169 * @param cls pointer to "unit16_t*" with message type
170 * @param size number of bytes available in buf
171 * @param buf where to copy the message, NULL on error
172 * @return number of bytes copied to buf
175 write_result (void *cls, size_t size, void *buf)
178 struct GNUNET_MessageHeader *msg;
181 return 0; /* error, not much we can do */
182 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
184 msg->size = htons (sizeof (struct GNUNET_MessageHeader));
185 msg->type = htons (*res);
187 return sizeof (struct GNUNET_MessageHeader);
193 * Signal our client that we will start or stop the
196 * @param client who is being signalled
197 * @param name name of the service
198 * @param result message type to send
199 * @return NULL if it was not found
202 signal_result (struct GNUNET_SERVER_Client *client,
203 const char *name, uint16_t result)
210 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
211 "Telling client that service `%s' is now %s\n",
213 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
215 res = GNUNET_malloc (sizeof (uint16_t));
217 GNUNET_SERVER_notify_transmit_ready (client,
218 sizeof (struct GNUNET_MessageHeader),
219 GNUNET_TIME_UNIT_FOREVER_REL,
225 * Find the process with the given service
226 * name in the given list, remove it and return it.
228 * @param name which service entry to look up
229 * @return NULL if it was not found
231 static struct ServiceList *
232 find_name (const char *name)
234 struct ServiceList *pos;
235 struct ServiceList *prev;
241 if (0 == strcmp (pos->name, name))
246 prev->next = pos->next;
258 * Free an entry in the service list.
260 * @param pos entry to free
263 free_entry (struct ServiceList *pos)
265 GNUNET_free_non_null (pos->config);
266 GNUNET_free_non_null (pos->binary);
267 GNUNET_free (pos->name);
273 * Actually start the process for the given service.
275 * @param sl identifies service to start
278 start_process (struct ServiceList *sl)
283 unsigned int argv_size;
286 const char *firstarg;
291 GNUNET_CONFIGURATION_get_value_string (cfg,
292 sl->name, "PREFIX", &loprefix))
293 loprefix = GNUNET_strdup (prefix_command);
295 GNUNET_CONFIGURATION_get_value_string (cfg,
296 sl->name, "OPTIONS", &options))
297 options = GNUNET_strdup ("");
298 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
300 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
302 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
303 "Starting service `%s' using binary `%s' and configuration `%s'\n",
304 sl->name, sl->binary, sl->config);
310 while ('\0' != *lopos)
317 while ('\0' != *optpos)
324 argv = GNUNET_malloc (argv_size * sizeof (char *));
328 while ('\0' != *lopos)
330 while (*lopos == ' ')
336 argv[argv_size++] = lopos;
337 while (('\0' != *lopos) && (' ' != *lopos))
345 firstarg = sl->binary;
346 argv[argv_size++] = sl->binary;
347 argv[argv_size++] = "-c";
348 argv[argv_size++] = sl->config;
349 if (GNUNET_YES == use_debug)
351 argv[argv_size++] = "-L";
352 argv[argv_size++] = "DEBUG";
355 while ('\0' != *optpos)
357 while (*optpos == ' ')
361 argv[argv_size++] = optpos;
362 while (('\0' != *optpos) && (' ' != *optpos))
369 argv[argv_size++] = NULL;
370 sl->pid = GNUNET_OS_start_process_v (firstarg, argv);
372 GNUNET_free (loprefix);
373 GNUNET_free (options);
378 * Start the specified service.
380 * @param client who is asking for this
381 * @param servicename name of the service to start
384 start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
386 struct ServiceList *sl;
390 sl = find_name (servicename);
393 /* already running, just increment RC */
394 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
395 _("Service `%s' already running.\n"), servicename);
399 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
403 GNUNET_CONFIGURATION_get_value_string (cfg,
404 servicename, "BINARY", &binary))
406 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
407 _("Binary implementing service `%s' not known!\n"),
409 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
413 GNUNET_CONFIGURATION_get_value_filename (cfg,
417 (0 != STAT (config, &sbuf)))
419 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
420 _("Configuration file `%s' for service `%s' not known!\n"),
421 config, servicename);
422 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
423 GNUNET_free (binary);
424 GNUNET_free_non_null (config);
427 sl = GNUNET_malloc (sizeof (struct ServiceList));
428 sl->name = GNUNET_strdup (servicename);
433 sl->mtime = sbuf.st_mtime;
437 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
442 * Free the given entry in the service list and signal
443 * the given client that the service is now down.
445 * @param cls pointer to the client ("struct GNUNET_SERVER_Client*")
446 * @param pos entry for the service
449 free_and_signal (void *cls, struct ServiceList *pos)
451 struct GNUNET_SERVER_Client *client = cls;
452 /* find_name will remove "pos" from the list! */
453 GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Service `%s' stopped\n", pos->name);
454 signal_result (client, pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
455 GNUNET_SERVER_receive_done (client, GNUNET_OK);
456 GNUNET_SERVER_client_drop (client);
462 * Stop the specified service.
464 * @param client who is asking for this
465 * @param servicename name of the service to stop
468 stop_service (struct GNUNET_SERVER_Client *client,
469 const char *servicename)
471 struct ServiceList *pos;
472 struct GNUNET_CLIENT_Connection *sc;
473 unsigned long long port;
475 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
476 _("Preparing to stop `%s'\n"), servicename);
477 pos = find_name (servicename);
478 if ((pos != NULL) && (pos->kill_continuation != NULL))
480 /* killing already in progress */
482 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
483 "Service `%s' is already down\n", servicename);
485 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
488 if ((pos != NULL) && (pos->rc > 1))
490 /* RC>1, just decrement RC */
495 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
496 "Service `%s' still used by %u clients, will keep it running!\n",
500 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
501 GNUNET_SERVER_receive_done (client, GNUNET_OK);
507 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
508 "Sending kill signal to service `%s', waiting for process to die.\n",
511 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
512 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
515 pos->kill_continuation = &free_and_signal;
516 pos->kill_continuation_cls = client;
517 GNUNET_SERVER_client_keep (client);
518 GNUNET_SCHEDULER_add_delayed (sched,
520 GNUNET_SCHEDULER_PRIORITY_IDLE,
521 GNUNET_SCHEDULER_NO_TASK,
522 MAINT_FAST_FREQUENCY, &maint, NULL);
527 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
528 "Sending termination request to service `%s'.\n",
532 GNUNET_CONFIGURATION_get_value_number (cfg,
536 (NULL != (sc = GNUNET_CLIENT_connect (sched, servicename, cfg))) )
538 GNUNET_CLIENT_service_shutdown (sc);
539 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
543 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
545 GNUNET_SERVER_receive_done (client, GNUNET_OK);
551 * Handle START-message.
553 * @param cls closure (always NULL)
554 * @param client identification of the client
555 * @param message the actual message
556 * @return GNUNET_OK to keep the connection open,
557 * GNUNET_SYSERR to close it (signal serious error)
560 handle_start (void *cls,
561 struct GNUNET_SERVER_Client *client,
562 const struct GNUNET_MessageHeader *message)
564 const char *servicename;
567 size = ntohs (message->size);
568 size -= sizeof (struct GNUNET_MessageHeader);
569 servicename = (const char *) &message[1];
570 if ((size == 0) || (servicename[size - 1] != '\0'))
573 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
576 start_service (client, servicename);
577 GNUNET_SERVER_receive_done (client, GNUNET_OK);
582 * Handle STOP-message.
584 * @param cls closure (always NULL)
585 * @param client identification of the client
586 * @param message the actual message
587 * @return GNUNET_OK to keep the connection open,
588 * GNUNET_SYSERR to close it (signal serious error)
591 handle_stop (void *cls,
592 struct GNUNET_SERVER_Client *client,
593 const struct GNUNET_MessageHeader *message)
595 const char *servicename;
598 size = ntohs (message->size);
599 size -= sizeof (struct GNUNET_MessageHeader);
600 servicename = (const char *) &message[1];
601 if ((size == 0) || (servicename[size - 1] != '\0'))
604 GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
607 stop_service (client, servicename);
612 * Background task doing maintenance.
614 * @param cls closure, NULL if we need to self-restart
618 maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
620 struct ServiceList *pos;
621 struct ServiceList *prev;
622 struct ServiceList *next;
628 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
630 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n"));
631 while (NULL != (pos = running))
634 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
635 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
636 if (GNUNET_OK != GNUNET_OS_process_wait(pos->pid))
637 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "waitpid");
643 GNUNET_SCHEDULER_add_delayed (tc->sched,
645 GNUNET_SCHEDULER_PRIORITY_IDLE,
646 GNUNET_SCHEDULER_NO_TASK,
647 MAINT_FREQUENCY, &maint, NULL);
649 /* check for services that died (WAITPID) */
652 while (NULL != (pos = next))
654 enum GNUNET_OS_ProcessStatusType statusType;
655 unsigned long statusCode;
660 if (NULL != pos->kill_continuation)
666 pos->kill_continuation (pos->kill_continuation_cls, pos);
670 if ( (GNUNET_SYSERR == (ret = GNUNET_OS_process_status(pos->pid,
673 ( (ret == GNUNET_NO) ||
674 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
675 (statusType == GNUNET_OS_PROCESS_RUNNING) ) )
680 if (statusType == GNUNET_OS_PROCESS_EXITED)
682 statstr = _( /* process termination method */ "exit");
683 statcode = statusCode;
685 else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
687 statstr = _( /* process termination method */ "signal");
688 statcode = statusCode;
692 statstr = _( /* process termination method */ "unknown");
695 if (NULL != pos->kill_continuation)
701 pos->kill_continuation (pos->kill_continuation_cls, pos);
704 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
705 _("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
706 pos->name, statstr, statcode);
707 /* schedule restart */
712 /* check for services that need to be restarted due to
713 configuration changes or because the last restart failed */
717 if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
719 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
720 _("Restarting service `%s' due to configuration file change.\n"));
721 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
722 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
726 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
727 _("Restarting service `%s'.\n"), pos->name);
728 /* FIXME: should have some exponentially
729 increasing timer to avoid tight restart loops */
738 * List of handlers for the messages understood by this service.
740 static struct GNUNET_SERVER_MessageHandler handlers[] = {
741 {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
742 {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
748 * Process arm requests.
751 * @param s scheduler to use
752 * @param server the initialized server
753 * @param c configuration to use
757 struct GNUNET_SCHEDULER_Handle *s,
758 struct GNUNET_SERVER_Handle *server,
759 const struct GNUNET_CONFIGURATION_Handle *c)
761 char *defaultservices;
767 GNUNET_CONFIGURATION_get_value_string (cfg,
771 prefix_command = GNUNET_strdup ("");
772 /* start default services... */
774 GNUNET_CONFIGURATION_get_value_string (cfg,
780 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
781 "Starting default services `%s'\n", defaultservices);
783 pos = strtok (defaultservices, " ");
786 start_service (NULL, pos);
787 pos = strtok (NULL, " ");
789 GNUNET_free (defaultservices);
794 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
795 "No default services configured.\n");
799 /* process client requests */
800 GNUNET_SERVER_add_handlers (server, handlers);
802 /* manage services */
803 GNUNET_SCHEDULER_add_delayed (sched,
805 GNUNET_SCHEDULER_PRIORITY_IDLE,
806 GNUNET_SCHEDULER_NO_TASK,
807 MAINT_FREQUENCY, &maint, NULL);
812 * The main function for the arm service.
814 * @param argc number of arguments from the command line
815 * @param argv command line arguments
816 * @return 0 ok, 1 on error
819 main (int argc, char *const *argv)
822 GNUNET_SERVICE_run (argc,
823 argv, "arm", &run, NULL, NULL, NULL)) ? 0 : 1;
826 /* end of gnunet-service-arm.c */