Smallers steps to keep plugin running
[oweals/gnunet.git] / src / arm / gnunet-service-arm.c
index 9643273cfaafd1ffdaca679921d226f76fc8c4f8..84aad3bfd2cfa3fcc8efbe7bc9f68ab189e28bbe 100644 (file)
@@ -1,10 +1,10 @@
 /*
      This file is part of GNUnet.
-     (C) 2009 Christian Grothoff (and other contributing authors)
+     (C) 2009, 2010 Christian Grothoff (and other contributing authors)
 
      GNUnet is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
+     by the Free Software Foundation; either version 3, or (at your
      option) any later version.
 
      GNUnet is distributed in the hope that it will be useful, but
  * @author Christian Grothoff
  *
  * TODO:
- * - multiple start-stop requests with RC>1 can result
- *   in UP/DOWN signals based on "pending" that are inaccurate...
- *   => have list of clients waiting for a resolution instead of
- *      giving instant (but incorrect) replies
- * - code could go into restart-loop for a service
- *   if service crashes instantly -- need exponential back-off
  * - need to test auto-restart code on configuration changes;
  * - should refine restart code to check if *relevant* parts of the
  *   configuration were changed (anything in the section for the service)
 #include "gnunet_os_lib.h"
 #include "gnunet_protocols.h"
 #include "gnunet_service_lib.h"
+#include "gnunet_signal_lib.h"
+#include "gnunet-service-arm.h"
 #include "arm.h"
 
 
 /**
- * Run maintenance every second.
+ * Check for configuration file changes every 5s.
  */
-#define MAINT_FREQUENCY GNUNET_TIME_UNIT_SECONDS
+#define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 5)
 
 /**
- * How long do we wait until we decide that a service
- * did not start?
+ * Threshold after which exponential backoff shouldn't increase (in ms); 30m
  */
-#define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES
+#define EXPONENTIAL_BACKOFF_THRESHOLD (1000 * 60 * 30)
 
+#define DELAY_SHUTDOWN GNUNET_NO
+
+/**
+ * List of our services.
+ */
 struct ServiceList;
 
-typedef void (*CleanCallback) (void *cls, struct ServiceList * pos);
 
 /**
  * List of our services.
@@ -86,15 +85,10 @@ struct ServiceList
   char *config;
 
   /**
-   * Function to call upon kill completion (waitpid), NULL
+   * Client to notify upon kill completion (waitpid), NULL
    * if we should simply restart the process.
    */
-  CleanCallback kill_continuation;
-
-  /**
-   * Closure for kill_continuation.
-   */
-  void *kill_continuation_cls;
+  struct GNUNET_SERVER_Client *killing_client;
 
   /**
    * Process ID of the child.
@@ -108,11 +102,14 @@ struct ServiceList
   time_t mtime;
 
   /**
-   * Reference counter (counts how many times we've been
-   * asked to start the service).  We only actually stop
-   * it once rc hits zero.
+   * Process exponential backoff time 
    */
-  unsigned int rc;
+  struct GNUNET_TIME_Relative backoff;
+
+  /**
+   * Absolute time at which the process is scheduled to restart in case of death 
+   */
+  struct GNUNET_TIME_Absolute restartAt;
 
 };
 
@@ -124,7 +121,7 @@ static struct ServiceList *running;
 /**
  * Our configuration
  */
-static struct GNUNET_CONFIGURATION_Handle *cfg;
+static const struct GNUNET_CONFIGURATION_Handle *cfg;
 
 /**
  * Our scheduler.
@@ -136,7 +133,98 @@ static struct GNUNET_SCHEDULER_Handle *sched;
  */
 static char *prefix_command;
 
+/**
+ * Option to append to each actual command.
+ */
+static char *final_option;
+
+/**
+ * ID of task called whenever we get a SIGCHILD.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
+
+/**
+ * ID of task called whenever the timeout for restarting a child
+ * expires.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
+
+/**
+ * Context for our SIGCHILD handler.
+ */
+static struct GNUNET_SIGNAL_Context *shc_chld;
+
+/**
+ * Pipe used to communicate shutdown via signal.
+ */
+static struct GNUNET_DISK_PipeHandle *sigpipe;
+
+/**
+ * Reading end of the signal pipe.
+ */
+static const struct GNUNET_DISK_FileHandle *pr;
+
+/**
+ * Are we in shutdown mode?
+ */
+static int in_shutdown;
+
+
+/**
+ * Handle to our server instance.  Our server is a bit special in that
+ * its service is not immediately stopped once we get a shutdown
+ * request (since we need to continue service until all of our child
+ * processes are dead).  This handle is used to shut down the server
+ * (and thus trigger process termination) once all child processes are
+ * also dead.  A special option in the ARM configuration modifies the
+ * behaviour of the service implementation to not do the shutdown
+ * immediately.
+ */
+static struct GNUNET_SERVER_Handle *server;
+
+
+/**
+ * If the configuration file changes, restart tasks that depended on that
+ * option.
+ *
+ * @param cls closure, NULL if we need to self-restart
+ * @param tc context
+ */
+static void 
+config_change_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+  struct stat sbuf;
+
+  pos = running;
+  while (pos != NULL)
+    {
+      /* FIXME: this test for config change is a bit too coarse grained */
+      if ( (0 == STAT (pos->config, &sbuf)) && 
+          (pos->mtime < sbuf.st_mtime) &&
+          (pos->pid != 0) )
+       {
+         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                     _("Restarting service `%s' due to configuration file change.\n"));
+         if (0 != PLIBC_KILL (pos->pid, SIGTERM))
+           GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
+         else
+           pos->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
+       }
+      pos = pos->next;
+    }
+}
 
+
+
+/**
+ * Transmit a status result message.
+ *
+ * @param cls pointer to "unit16_t*" with message type
+ * @param size number of bytes available in buf
+ * @param buf where to copy the message, NULL on error
+ * @return number of bytes copied to buf
+ */
 static size_t
 write_result (void *cls, size_t size, void *buf)
 {
@@ -144,7 +232,16 @@ write_result (void *cls, size_t size, void *buf)
   struct GNUNET_MessageHeader *msg;
 
   if (buf == NULL)
-    return 0;                   /* error, not much we can do */
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                 _("Could not send status result to client\n"));
+      return 0;                        /* error, not much we can do */
+    }
+#if DEBUG_ARM
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+             "Sending status response %u to client\n",
+             (unsigned int) *res);
+#endif
   GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
   msg = buf;
   msg->size = htons (sizeof (struct GNUNET_MessageHeader));
@@ -159,26 +256,36 @@ write_result (void *cls, size_t size, void *buf)
  * Signal our client that we will start or stop the
  * service.
  *
+ * @param client who is being signalled
+ * @param name name of the service
+ * @param result message type to send
  * @return NULL if it was not found
  */
 static void
 signal_result (struct GNUNET_SERVER_Client *client,
-               const char *name, uint16_t result)
+              const char *name, uint16_t result)
 {
   uint16_t *res;
 
+  if (NULL == client)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                 _
+                 ("Not sending status result to client: no client known\n"));
+      return;
+    }
 #if DEBUG_ARM
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-              "Telling client that service `%s' is now %s\n",
-              name,
-              result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
+             "Telling client that service `%s' is now %s\n",
+             name,
+             result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
 #endif
   res = GNUNET_malloc (sizeof (uint16_t));
   *res = result;
   GNUNET_SERVER_notify_transmit_ready (client,
-                                       sizeof (struct GNUNET_MessageHeader),
-                                       GNUNET_TIME_UNIT_FOREVER_REL,
-                                       &write_result, res);
+                                      sizeof (struct GNUNET_MessageHeader),
+                                      GNUNET_TIME_UNIT_FOREVER_REL,
+                                      &write_result, res);
 }
 
 
@@ -186,6 +293,7 @@ signal_result (struct GNUNET_SERVER_Client *client,
  * Find the process with the given service
  * name in the given list, remove it and return it.
  *
+ * @param name which service entry to look up
  * @return NULL if it was not found
  */
 static struct ServiceList *
@@ -199,14 +307,14 @@ find_name (const char *name)
   while (pos != NULL)
     {
       if (0 == strcmp (pos->name, name))
-        {
-          if (prev == NULL)
-            running = pos->next;
-          else
-            prev->next = pos->next;
-          pos->next = NULL;
-          return pos;
-        }
+       {
+         if (prev == NULL)
+           running = pos->next;
+         else
+           prev->next = pos->next;
+         pos->next = NULL;
+         return pos;
+       }
       prev = pos;
       pos = pos->next;
     }
@@ -214,6 +322,11 @@ find_name (const char *name)
 }
 
 
+/**
+ * Free an entry in the service list.
+ *
+ * @param pos entry to free
+ */
 static void
 free_entry (struct ServiceList *pos)
 {
@@ -223,234 +336,260 @@ free_entry (struct ServiceList *pos)
   GNUNET_free (pos);
 }
 
-
-
+#include "do_start_process.c"
 
 /**
  * Actually start the process for the given service.
  *
  * @param sl identifies service to start
+ * @param lsocks -1 terminated list of listen sockets to pass (systemd style), or NULL
  */
 static void
-start_process (struct ServiceList *sl)
+start_process (struct ServiceList *sl,
+              const int *lsocks)
 {
   char *loprefix;
   char *options;
-  char **argv;
-  unsigned int argv_size;
-  char *lopos;
   char *optpos;
-  const char *firstarg;
+  char *optend;
+  const char *next;
   int use_debug;
+  char b;
+  char *val;
 
   /* start service */
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
-                                             sl->name, "PREFIX", &loprefix))
+                                            sl->name, "PREFIX", &loprefix))
     loprefix = GNUNET_strdup (prefix_command);
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
-                                             sl->name, "OPTIONS", &options))
-    options = GNUNET_strdup ("");
+                                            sl->name, "OPTIONS", &options))
+    {      
+      options = GNUNET_strdup (final_option);
+      if (NULL == strstr (options, "%"))
+       {
+         /* replace '{}' with service name */
+         while (NULL != (optpos = strstr (options, "{}")))
+           {
+             optpos[0] = '%';
+             optpos[1] = 's';
+             GNUNET_asprintf (&optpos,
+                              options,
+                              sl->name);
+             GNUNET_free (options);
+             options = optpos;
+           }
+         /* replace '$PATH' with value associated with "PATH" */
+         while (NULL != (optpos = strstr (options, "$")))
+           {
+             optend = optpos + 1;
+             while (isupper ( (unsigned char) *optend)) optend++;            
+             b = *optend;
+             if ('\0' == b)
+               next = "";
+             else
+               next = optend+1;
+             *optend = '\0';
+             if (GNUNET_OK !=
+                 GNUNET_CONFIGURATION_get_value_string (cfg, "PATHS",
+                                                        optpos+1,
+                                                        &val))
+               val = GNUNET_strdup ("");
+             *optpos = '\0';
+             GNUNET_asprintf (&optpos,
+                              "%s%s%c%s",
+                              options,
+                              val,
+                              b,
+                              next);
+             GNUNET_free (options);
+             GNUNET_free (val);
+             options = optpos;
+           }
+       }
+    }
   use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
 
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
 #if DEBUG_ARM
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-              "Starting service `%s' using binary `%s' and configuration `%s'\n",
-              sl->name, sl->binary, sl->config);
+             "Starting service `%s' using binary `%s' and configuration `%s'\n",
+             sl->name, sl->binary, sl->config);
 #endif
-  argv_size = 6;
-  if (use_debug)
-    argv_size += 2;
-  lopos = loprefix;
-  while ('\0' != *lopos)
-    {
-      if (*lopos == ' ')
-        argv_size++;
-      lopos++;
-    }
-  optpos = options;
-  while ('\0' != *optpos)
-    {
-      if (*optpos == ' ')
-        argv_size++;
-      optpos++;
-    }
-  firstarg = NULL;
-  argv = GNUNET_malloc (argv_size * sizeof (char *));
-  argv_size = 0;
-  lopos = loprefix;
-
-  while ('\0' != *lopos)
-    {
-      while (*lopos == ' ')
-        lopos++;
-      if (*lopos == '\0')
-        continue;
-      if (argv_size == 0)
-        firstarg = lopos;
-      argv[argv_size++] = lopos;
-      while (('\0' != *lopos) && (' ' != *lopos))
-        lopos++;
-      if ('\0' == *lopos)
-        continue;
-      *lopos = '\0';
-      lopos++;
-    }
-  if (argv_size == 0)
-    firstarg = sl->binary;
-  argv[argv_size++] = sl->binary;
-  argv[argv_size++] = "-c";
-  argv[argv_size++] = sl->config;
   if (GNUNET_YES == use_debug)
-    {
-      argv[argv_size++] = "-L";
-      argv[argv_size++] = "DEBUG";
-    }
-  optpos = options;
-  while ('\0' != *optpos)
-    {
-      while (*optpos == ' ')
-        optpos++;
-      if (*optpos == '\0')
-        continue;
-      argv[argv_size++] = optpos;
-      while (('\0' != *optpos) && (' ' != *optpos))
-        optpos++;
-      if ('\0' == *optpos)
-        continue;
-      *optpos = '\0';
-      optpos++;
-    }
-  argv[argv_size++] = NULL;
-  sl->pid = GNUNET_OS_start_process_v (firstarg, argv);
-  GNUNET_free (argv);
+    sl->pid = do_start_process (lsocks,
+                               loprefix,                               
+                               sl->binary,
+                               "-c", sl->config,
+                               "-L", "DEBUG",
+                               options,
+                               NULL);
+  else
+    sl->pid = do_start_process (lsocks,
+                               loprefix,
+                               sl->binary,
+                               "-c", sl->config,
+                               options,
+                               NULL);
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO, 
+             _("Starting service `%s' (PID: %d)\n"), 
+             sl->name,
+             (int) sl->pid);
   GNUNET_free (loprefix);
   GNUNET_free (options);
+  /* FIXME: should check sl->pid */
 }
 
 
 /**
  * Start the specified service.
+ *
+ * @param client who is asking for this
+ * @param servicename name of the service to start
+ * @param lsocks -1 terminated list of listen sockets to pass (systemd style), or NULL
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
  */
-static void
-start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
+int
+start_service (struct GNUNET_SERVER_Client *client, 
+              const char *servicename,
+              const int *lsocks)
 {
   struct ServiceList *sl;
   char *binary;
   char *config;
   struct stat sbuf;
+
+  if (GNUNET_YES == in_shutdown)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                 _("ARM is shutting down, service `%s' not started.\n"),
+                 servicename);
+      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      return GNUNET_SYSERR;
+    }
   sl = find_name (servicename);
   if (sl != NULL)
     {
-      /* already running, just increment RC */
-      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                  _("Service `%s' already running.\n"), servicename);
-      sl->rc++;
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                 _("Service `%s' already running.\n"), servicename);
       sl->next = running;
       running = sl;
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
-      return;
+      return GNUNET_SYSERR;
     }
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
-                                             servicename, "BINARY", &binary))
+                                            servicename, "BINARY", &binary))
     {
       GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
-                  _("Binary implementing service `%s' not known!\n"),
-                  servicename);
+                 _("Binary implementing service `%s' not known!\n"),
+                 servicename);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
-      return;
+      return GNUNET_SYSERR;
     }
   if ((GNUNET_OK !=
        GNUNET_CONFIGURATION_get_value_filename (cfg,
-                                                servicename,
-                                                "CONFIG",
-                                                &config)) ||
+                                               servicename,
+                                               "CONFIG",
+                                               &config)) ||
       (0 != STAT (config, &sbuf)))
     {
       GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
-                  _("Configuration file `%s' for service `%s' not known!\n"),
-                  config, servicename);
+                 _("Configuration file `%s' for service `%s' not known!\n"),
+                 config, servicename);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
       GNUNET_free (binary);
-      GNUNET_free (config);
-      return;
+      GNUNET_free_non_null (config);
+      return GNUNET_SYSERR;
     }
+  (void) stop_listening (servicename);
   sl = GNUNET_malloc (sizeof (struct ServiceList));
   sl->name = GNUNET_strdup (servicename);
   sl->next = running;
-  sl->rc = 1;
   sl->binary = binary;
   sl->config = config;
   sl->mtime = sbuf.st_mtime;
-  running = sl;
-  start_process (sl);
-  signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
-}
+  sl->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
+  sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS;
 
-
-static void
-free_and_signal (void *cls, struct ServiceList *pos)
-{
-  struct GNUNET_SERVER_Client *client = cls;
-  /* find_name will remove "pos" from the list! */
-  GNUNET_assert (pos == find_name (pos->name));
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Service `%s' stopped\n", pos->name);
-  signal_result (client, pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
-  GNUNET_SERVER_receive_done (client, GNUNET_OK);
-  GNUNET_SERVER_client_drop (client);
-  free_entry (pos);
+  running = sl;
+  start_process (sl, lsocks);
+  if (NULL != client)
+    signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
+  return GNUNET_OK;
 }
 
 
 /**
  * Stop the specified service.
+ *
+ * @param client who is asking for this
+ * @param servicename name of the service to stop
  */
 static void
-stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
+stop_service (struct GNUNET_SERVER_Client *client,
+             const char *servicename)
 {
   struct ServiceList *pos;
-  struct GNUNET_CLIENT_Connection *sc;
 
   GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-              "Preparing to stop `%s'\n", servicename);
+             _("Preparing to stop `%s'\n"), servicename);
   pos = find_name (servicename);
-  if ((pos != NULL) && (pos->kill_continuation != NULL))
+  if (pos == NULL)
     {
-      /* killing already in progress */
-      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      if (GNUNET_OK == stop_listening (servicename))
+       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      else
+       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
+      GNUNET_SERVER_receive_done (client, GNUNET_OK);
       return;
     }
-  if ((pos != NULL) && (pos->rc > 1))
+  if (pos->killing_client != NULL)
     {
-      /* RC>1, just decrement RC */
-      pos->rc--;
+      /* killing already in progress */
+#if DEBUG_ARM
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                 "Service `%s' is already down\n", servicename);
+#endif
+      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      GNUNET_SERVER_receive_done (client, GNUNET_OK);
       pos->next = running;
       running = pos;
-      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
-      GNUNET_SERVER_receive_done (client, GNUNET_OK);
       return;
     }
-  if (pos != NULL)
+
+  if (GNUNET_YES == in_shutdown)
     {
-      if (0 != PLIBC_KILL (pos->pid, SIGTERM))
-        GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
+#if DEBUG_ARM
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                 "Termination request already sent to `%s' (since ARM is in shutdown).\n",
+                 servicename);
+#endif
+      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      GNUNET_SERVER_receive_done (client, GNUNET_OK);
       pos->next = running;
       running = pos;
-      pos->kill_continuation = &free_and_signal;
-      pos->kill_continuation_cls = client;
-      GNUNET_SERVER_client_keep (client);
+      return;
     }
-  else
+  if (pos->pid == 0)
     {
-      sc = GNUNET_CLIENT_connect (sched, servicename, cfg);
-      GNUNET_CLIENT_service_shutdown (sc);
-      GNUNET_CLIENT_disconnect (sc);
+      /* process is in delayed restart, simply remove it! */
+      free_entry (pos);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
       GNUNET_SERVER_receive_done (client, GNUNET_OK);
+      return;
     }
+#if DEBUG_ARM
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+             "Sending kill signal to service `%s', waiting for process to die.\n",
+             servicename);
+#endif
+  if (0 != PLIBC_KILL (pos->pid, SIGTERM))
+    GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
+  pos->next = running;
+  running = pos;
+  pos->killing_client = client;
+  GNUNET_SERVER_client_keep (client);
 }
 
 
@@ -465,8 +604,8 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
  */
 static void
 handle_start (void *cls,
-              struct GNUNET_SERVER_Client *client,
-              const struct GNUNET_MessageHeader *message)
+             struct GNUNET_SERVER_Client *client,
+             const struct GNUNET_MessageHeader *message)
 {
   const char *servicename;
   uint16_t size;
@@ -480,7 +619,7 @@ handle_start (void *cls,
       GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
       return;
     }
-  start_service (client, servicename);
+  start_service (client, servicename, NULL);
   GNUNET_SERVER_receive_done (client, GNUNET_OK);
 }
 
@@ -496,8 +635,8 @@ handle_start (void *cls,
  */
 static void
 handle_stop (void *cls,
-             struct GNUNET_SERVER_Client *client,
-             const struct GNUNET_MessageHeader *message)
+            struct GNUNET_SERVER_Client *client,
+            const struct GNUNET_MessageHeader *message)
 {
   const char *servicename;
   uint16_t size;
@@ -515,120 +654,361 @@ handle_stop (void *cls,
 }
 
 
+/**
+ * Remove all entries for tasks that are not running
+ * (pid = 0) from the running list (they will no longer
+ * be restarted since we are shutting down).
+ */
+static void
+clean_up_running ()
+{
+  struct ServiceList *pos;
+  struct ServiceList *next;
+  struct ServiceList *prev;
+  pos = running;
+  prev = NULL;
+  while (NULL != pos)
+    {
+      next = pos->next;
+      if (pos->pid == 0)
+       {
+         if (prev == NULL)
+           running = next;
+         else
+           prev->next = next;
+         free_entry (pos);
+       }
+      else
+       prev = pos;
+      pos = next;
+    }
+}
+
 
 /**
- * Background task doing maintenance.
+ * We are done with everything.  Stop remaining 
+ * tasks, signal handler and the server. 
+ */
+static void
+do_shutdown ()
+{
+  GNUNET_SERVER_destroy (server);
+  server = NULL;
+  GNUNET_SIGNAL_handler_uninstall (shc_chld);
+  shc_chld = NULL;
+  GNUNET_SCHEDULER_cancel (sched, child_death_task);
+  child_death_task = GNUNET_SCHEDULER_NO_TASK;
+}
+
+#if DELAY_SHUTDOWN
+/**
+ * Dummy task to delay arm shutdown.
+ */
+void dummy_task (void *cls,
+                 const struct GNUNET_SCHEDULER_TaskContext * tc)
+{
+  GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Dummy task executing\n");
+  return;
+}
+#endif
+
+/**
+ * Task run for shutdown.
  *
- * @param cls closure
+ * @param cls closure, NULL if we need to self-restart
  * @param tc context
  */
 static void
-maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
 {
   struct ServiceList *pos;
-  const char *statstr;
-  int statcode;
-  struct stat sbuf;
 
+#if DEBUG_ARM
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("Stopping all services\n"));
+#endif
+  stop_listening (NULL);
+  in_shutdown = GNUNET_YES;
+  pos = running;
+  while (NULL != pos)
+    {
+      if (pos->pid != 0)
+       {
+         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                     "Stopping service `%s' (PID: %d)\n",
+                     pos->name,
+                     pos->pid);
+         if (0 != PLIBC_KILL (pos->pid, SIGTERM))
+           GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
+       }
+      pos = pos->next;
+    }
+#if DELAY_SHUTDOWN
+  GNUNET_SCHEDULER_add_delayed(sched, GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, 2), &dummy_task, NULL);
+#endif
+  if (running == NULL)
+    do_shutdown ();
+}
+
+
+/**
+ * Task run whenever it is time to restart a child that died.
+ *
+ * @param cls closure, always NULL
+ * @param tc context
+ */
+static void
+delayed_restart_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+  struct GNUNET_TIME_Relative lowestRestartDelay;
+
+  child_restart_task = GNUNET_SCHEDULER_NO_TASK;
   if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
     {
-      GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n"));
-      while (NULL != (pos = running))
-        {
-          running = pos->next;
-          if (0 != PLIBC_KILL (pos->pid, SIGTERM))
-            GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
-          if (GNUNET_OK != GNUNET_OS_process_wait(pos->pid))
-            GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "waitpid");
-          free_entry (pos);
-        }
+      clean_up_running ();
+      if (NULL == running)
+       do_shutdown ();
       return;
     }
-  GNUNET_SCHEDULER_add_delayed (tc->sched,
-                                GNUNET_YES,
-                                GNUNET_SCHEDULER_PRIORITY_IDLE,
-                                GNUNET_SCHEDULER_NO_PREREQUISITE_TASK,
-                                MAINT_FREQUENCY, &maint, cfg);
+  lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
+
+  /* check for services that need to be restarted due to
+     configuration changes or because the last restart failed */
+  pos = running;
+  while (pos != NULL)
+    {
+      if ( (pos->pid == 0) && 
+          (GNUNET_YES != in_shutdown) )
+       {
+         if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
+           {
+             GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                         _("Restarting service `%s'.\n"), pos->name);
+             start_process (pos, NULL);
+           }
+         else
+           {
+             lowestRestartDelay 
+               = GNUNET_TIME_relative_min (lowestRestartDelay,
+                                           GNUNET_TIME_absolute_get_remaining
+                                           (pos->restartAt));
+           }
+       }
+      pos = pos->next;
+    }  
+  if (lowestRestartDelay.value != GNUNET_TIME_UNIT_FOREVER_REL.value)
+    {
+#if DEBUG_ARM
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                 "Will restart process in %llums\n",
+                 (unsigned long long) lowestRestartDelay.value);
+#endif
+      child_restart_task
+       = GNUNET_SCHEDULER_add_delayed (sched,
+                                       lowestRestartDelay,
+                                       &delayed_restart_task,
+                                       NULL);
+    }
+}
+
+
+/**
+ * Task triggered whenever we receive a SIGCHLD (child
+ * process died).  
+ *
+ * @param cls closure, NULL if we need to self-restart
+ * @param tc context
+ */
+static void
+maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+  struct ServiceList *prev;
+  struct ServiceList *next;
+  const char *statstr;
+  int statcode;
+  int ret;
+  char c[16];
+  enum GNUNET_OS_ProcessStatusType statusType;
+  unsigned long statusCode;
+
+  child_death_task = GNUNET_SCHEDULER_NO_TASK;
+  if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
+    {
+      child_death_task =
+       GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
+                                       &maint_child_death, NULL);
+      return;    
+    }
+  /* consume the signal */
+  GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c)));
 
   /* check for services that died (WAITPID) */
-  for (pos = running; pos != NULL; pos = pos->next)
+  prev = NULL;
+  next = running;
+  while (NULL != (pos = next))
     {
-      enum GNUNET_OS_ProcessStatusType statusType;
-      unsigned long statusCode;
-
-      if (GNUNET_OS_process_status(pos->pid, &statusType, &statusCode) != GNUNET_OK)
-      {
-        GNUNET_log_strerror(GNUNET_ERROR_TYPE_ERROR, "GNUNET_OS_process_status");
-        continue;
-      }
-
-      if (statusType == GNUNET_OS_PROCESS_STOPPED || statusType == GNUNET_OS_PROCESS_RUNNING)
-        continue;
-      else if (statusType == GNUNET_OS_PROCESS_EXITED)
-        {
-          statstr = _( /* process termination method */ "exit");
-          statcode = statusCode;
-        }
+      next = pos->next;
+      if (pos->pid == 0) 
+       {
+         prev = pos;
+         continue;
+       }
+      if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
+                                                            &statusType,
+                                                            &statusCode))) ||
+         ( (ret == GNUNET_NO) ||
+           (statusType == GNUNET_OS_PROCESS_STOPPED) ||
+           (statusType == GNUNET_OS_PROCESS_RUNNING)) )
+       {
+         prev = pos;
+         continue;
+       }
+
+      if (statusType == GNUNET_OS_PROCESS_EXITED)
+       {
+         statstr = _( /* process termination method */ "exit");
+         statcode = statusCode;
+       }
       else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
-        {
-          statstr = _( /* process termination method */ "signal");
-          statcode = statusCode;
-        }
+       {
+         statstr = _( /* process termination method */ "signal");
+         statcode = statusCode;
+       }
       else
-        {
-          statstr = _( /* process termination method */ "unknown");
-          statcode = 0;
-        }
-      if (NULL != pos->kill_continuation)
-        {
-          pos->kill_continuation (pos->kill_continuation_cls, pos);
-        }
+       {
+         statstr = _( /* process termination method */ "unknown");
+         statcode = 0;
+       }
+      pos->pid = 0;
+      if (NULL != pos->killing_client) 
+       {
+         if (prev == NULL)
+           running = next;
+         else
+           prev->next = next;
+         GNUNET_log (GNUNET_ERROR_TYPE_INFO, 
+                     _("Service `%s' stopped\n"),
+                     pos->name);
+         signal_result (pos->killing_client, 
+                        pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+         GNUNET_SERVER_receive_done (pos->killing_client, GNUNET_OK);
+         GNUNET_SERVER_client_drop (pos->killing_client);
+         free_entry (pos);
+         continue;
+       }
+      if (GNUNET_YES != in_shutdown)
+       {
+         GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                     _
+                     ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
+                     pos->name, statstr, statcode);
+         /* schedule restart */
+         pos->restartAt
+           = GNUNET_TIME_relative_to_absolute (pos->backoff);
+         if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
+           pos->backoff 
+             = GNUNET_TIME_relative_multiply (pos->backoff, 2);
+         if (GNUNET_SCHEDULER_NO_TASK != child_restart_task)
+           GNUNET_SCHEDULER_cancel (sched, child_restart_task);
+         child_restart_task 
+           = GNUNET_SCHEDULER_add_with_priority (sched,
+                                                 GNUNET_SCHEDULER_PRIORITY_IDLE,
+                                                 &delayed_restart_task,
+                                                 NULL);
+       }
+#if DEBUG_ARM
       else
-        {
-          GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
-                      _
-                      ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
-                      pos->name, statstr, statcode);
-          /* schedule restart */
-          pos->pid = 0;
-        }
+       GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                   "Service `%s' terminated with status %s/%d\n",
+                   pos->name, statstr, statcode);
+#endif
+      prev = pos;
     }
+  if (in_shutdown)
+    clean_up_running ();
+  if ( (running == NULL) &&
+       (in_shutdown) )
+    {
+      GNUNET_SERVER_destroy (server);
+      GNUNET_SIGNAL_handler_uninstall (shc_chld);
+      shc_chld = NULL;
+    }
+  else
+    {
+      child_death_task =
+       GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
+                                       &maint_child_death, NULL);
+    }
+}
 
-  /* check for services that need to be restarted due to
-     configuration changes or because the last restart failed */
-  pos = running;
-  while (pos != NULL)
+
+static size_t
+transmit_shutdown_ack (void *cls, size_t size, void *buf)
+{
+  struct GNUNET_SERVER_Client *client = cls;
+  struct GNUNET_MessageHeader *msg;
+
+  if (size < sizeof (struct GNUNET_MessageHeader))
     {
-      if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
-        {
-          GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                      _
-                      ("Restarting service `%s' due to configuration file change.\n"));
-          if (0 != PLIBC_KILL (pos->pid, SIGTERM))
-            GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
-        }
-      if (pos->pid == 0)
-        {
-          GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                      _("Restarting service `%s'.\n"), pos->name);
-          /* FIXME: should have some exponentially
-             increasing timer to avoid tight restart loops */
-          start_process (pos);
-        }
-      pos = pos->next;
+      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                  _("Failed to transmit shutdown ACK.\n"));
+      GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
+      return 0;                 /* client disconnected */
     }
+
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+              _("Transmitting shutdown ACK.\n"));
+
+  msg = (struct GNUNET_MessageHeader *) buf;
+  msg->type = htons (GNUNET_MESSAGE_TYPE_ARM_SHUTDOWN_ACK);
+  msg->size = htons (sizeof (struct GNUNET_MessageHeader));
+  GNUNET_SERVER_receive_done (client, GNUNET_OK);
+  GNUNET_SERVER_client_drop(client);
+  return sizeof (struct GNUNET_MessageHeader);
+}
+
+/**
+ * Handler for SHUTDOWN message.
+ *
+ * @param cls closure (refers to service)
+ * @param client identification of the client
+ * @param message the actual message
+ */
+static void
+handle_shutdown (void *cls,
+                 struct GNUNET_SERVER_Client *client,
+                 const struct GNUNET_MessageHeader *message)
+{
+  GNUNET_SERVER_client_keep(client);
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+              _("Initiating shutdown as requested by client.\n"));
+
+  GNUNET_SERVER_notify_transmit_ready (client,
+                                       sizeof(struct GNUNET_MessageHeader),
+                                       GNUNET_TIME_UNIT_FOREVER_REL,
+                                       &transmit_shutdown_ack, client);
+  GNUNET_SERVER_client_persist_ (client);
+  GNUNET_SCHEDULER_shutdown (sched);
 }
 
 
 /**
- * List of handlers for the messages understood by this
- * service.
+ * Signal handler called for SIGCHLD.  Triggers the
+ * respective handler by writing to the trigger pipe.
  */
-static struct GNUNET_SERVER_MessageHandler handlers[] = {
-  {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
-  {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
-  {NULL, NULL, 0, 0}
-};
+static void
+sighandler_child_death ()
+{
+  static char c;
+  int old_errno = errno; /* back-up errno */
+  GNUNET_break (1 == 
+               GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
+                                       (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
+                                       sizeof (c)));
+  errno = old_errno; /* restore errno */
+}
 
 
 /**
@@ -636,63 +1016,96 @@ static struct GNUNET_SERVER_MessageHandler handlers[] = {
  *
  * @param cls closure
  * @param s scheduler to use
- * @param server the initialized server
+ * @param serv the initialized server
  * @param c configuration to use
  */
 static void
 run (void *cls,
      struct GNUNET_SCHEDULER_Handle *s,
-     struct GNUNET_SERVER_Handle *server,
-     struct GNUNET_CONFIGURATION_Handle *c)
+     struct GNUNET_SERVER_Handle *serv,
+     const struct GNUNET_CONFIGURATION_Handle *c)
 {
+  static const struct GNUNET_SERVER_MessageHandler handlers[] = {
+    {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
+    {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
+    {&handle_shutdown, NULL, GNUNET_MESSAGE_TYPE_ARM_SHUTDOWN,
+     sizeof (struct GNUNET_MessageHeader)},
+    {NULL, NULL, 0, 0}
+  };
   char *defaultservices;
   char *pos;
 
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Starting...\n");
   cfg = c;
   sched = s;
+  server = serv;
+  GNUNET_assert (serv != NULL);
+  shc_chld = GNUNET_SIGNAL_handler_install (GNUNET_SIGCHLD, &sighandler_child_death);
+  GNUNET_assert (sigpipe == NULL);
+  sigpipe = GNUNET_DISK_pipe (GNUNET_NO);
+  GNUNET_assert (sigpipe != NULL);
+  pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
+  GNUNET_assert (pr != NULL);
+  GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES);
+  GNUNET_SCHEDULER_add_delayed (sched,
+                               GNUNET_TIME_UNIT_FOREVER_REL,
+                               &shutdown_task,
+                               NULL);
+  child_death_task =
+    GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
+                                   &maint_child_death, NULL);
+
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
-                                             "ARM",
-                                             "GLOBAL_PREFIX",
-                                             &prefix_command))
+                                            "ARM",
+                                            "GLOBAL_PREFIX",
+                                            &prefix_command))
     prefix_command = GNUNET_strdup ("");
+  if (GNUNET_OK !=
+      GNUNET_CONFIGURATION_get_value_string (cfg,
+                                            "ARM",
+                                            "GLOBAL_POSTFIX",
+                                            &final_option))
+    final_option = GNUNET_strdup ("");
   /* start default services... */
   if (GNUNET_OK ==
       GNUNET_CONFIGURATION_get_value_string (cfg,
-                                             "ARM",
-                                             "DEFAULTSERVICES",
-                                             &defaultservices))
+                                            "ARM",
+                                            "DEFAULTSERVICES",
+                                            &defaultservices))
     {
 #if DEBUG_ARM
       GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-                  "Starting default services `%s'\n", defaultservices);
+                 "Starting default services `%s'\n", defaultservices);
 #endif
-      pos = strtok (defaultservices, " ");
-      while (pos != NULL)
-        {
-          start_service (NULL, pos);
-          pos = strtok (NULL, " ");
-        }
+      if (0 < strlen (defaultservices))
+       {
+         pos = strtok (defaultservices, " ");
+         while (pos != NULL)
+           {
+             start_service (NULL, pos, NULL);
+             pos = strtok (NULL, " ");
+           }
+       }
       GNUNET_free (defaultservices);
     }
   else
     {
 #if DEBUG_ARM
       GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-                  "No default services configured.\n");
+                 "No default services configured.\n");
 #endif
     }
 
+  /* create listening sockets for future services*/
+  prepareServices (cfg, sched);
+  
   /* process client requests */
   GNUNET_SERVER_add_handlers (server, handlers);
 
   /* manage services */
-  GNUNET_SCHEDULER_add_delayed (sched,
-                                GNUNET_YES,
-                                GNUNET_SCHEDULER_PRIORITY_IDLE,
-                                GNUNET_SCHEDULER_NO_PREREQUISITE_TASK,
-                                MAINT_FREQUENCY, &maint, NULL);
+  GNUNET_SCHEDULER_add_with_priority (sched,
+                                     GNUNET_SCHEDULER_PRIORITY_IDLE,
+                                     &config_change_task, NULL);
 }
 
 
@@ -707,8 +1120,8 @@ int
 main (int argc, char *const *argv)
 {
   return (GNUNET_OK ==
-          GNUNET_SERVICE_run (argc,
-                              argv, "arm", &run, NULL, NULL, NULL)) ? 0 : 1;
+         GNUNET_SERVICE_run (argc,
+                             argv, "arm", GNUNET_YES, &run, NULL)) ? 0 : 1;
 }
 
 /* end of gnunet-service-arm.c */