Smallers steps to keep plugin running
[oweals/gnunet.git] / src / arm / gnunet-service-arm.c
index f525750cfb3686396a73b7844f568a2a811bf864..84aad3bfd2cfa3fcc8efbe7bc9f68ab189e28bbe 100644 (file)
@@ -1,10 +1,10 @@
 /*
      This file is part of GNUnet.
-     (C) 2009 Christian Grothoff (and other contributing authors)
+     (C) 2009, 2010 Christian Grothoff (and other contributing authors)
 
      GNUnet is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
+     by the Free Software Foundation; either version 3, or (at your
      option) any later version.
 
      GNUnet is distributed in the hope that it will be useful, but
  * @author Christian Grothoff
  *
  * TODO:
- * - multiple start-stop requests with RC>1 can result
- *   in UP/DOWN signals based on "pending" that are inaccurate...
- *   => have list of clients waiting for a resolution instead of
- *      giving instant (but incorrect) replies
- * - code could go into restart-loop for a service
- *   if service crashes instantly -- need exponential back-off
  * - need to test auto-restart code on configuration changes;
  * - should refine restart code to check if *relevant* parts of the
  *   configuration were changed (anything in the section for the service)
 #include "gnunet_protocols.h"
 #include "gnunet_service_lib.h"
 #include "gnunet_signal_lib.h"
+#include "gnunet-service-arm.h"
 #include "arm.h"
 
 
 /**
- * Run normal maintenance every 2s.
+ * Check for configuration file changes every 5s.
  */
-#define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 2)
+#define MAINT_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 5)
 
 /**
- * Run fast maintenance after 100ms.  This is used for an extra-job
- * that is run to check for a process that we just killed.
+ * Threshold after which exponential backoff shouldn't increase (in ms); 30m
  */
-#define MAINT_FAST_FREQUENCY GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 100)
-
-/**
- * How long do we wait until we decide that a service
- * did not start?
- */
-#define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES
-
-
-/**
- * Threshold after which exponential backoff shouldn't increase
- */
-#define EXPONENTIAL_BACKOFF_THRESHOLD 8
+#define EXPONENTIAL_BACKOFF_THRESHOLD (1000 * 60 * 30)
 
+#define DELAY_SHUTDOWN GNUNET_NO
 
 /**
  * List of our services.
  */
 struct ServiceList;
 
-/**
- * Function to call if waitpid informs us that
- * a process has died.
- *
- * @param cls closure
- * @param pos entry in the service list of the process that died
- */
-typedef void (*CleanCallback) (void *cls, struct ServiceList * pos);
 
 /**
  * List of our services.
@@ -110,15 +85,10 @@ struct ServiceList
   char *config;
 
   /**
-   * Function to call upon kill completion (waitpid), NULL
+   * Client to notify upon kill completion (waitpid), NULL
    * if we should simply restart the process.
    */
-  CleanCallback kill_continuation;
-
-  /**
-   * Closure for kill_continuation.
-   */
-  void *kill_continuation_cls;
+  struct GNUNET_SERVER_Client *killing_client;
 
   /**
    * Process ID of the child.
@@ -131,18 +101,15 @@ struct ServiceList
    */
   time_t mtime;
 
-  /* Process exponential backoff time */
+  /**
+   * Process exponential backoff time 
+   */
   struct GNUNET_TIME_Relative backoff;
 
-  /* Absolute time at which the process is scheduled to restart in case of death */
-  struct GNUNET_TIME_Absolute restartAt;
-
   /**
-   * Reference counter (counts how many times we've been
-   * asked to start the service).  We only actually stop
-   * it once rc hits zero.
+   * Absolute time at which the process is scheduled to restart in case of death 
    */
-  unsigned int rc;
+  struct GNUNET_TIME_Absolute restartAt;
 
 };
 
@@ -166,6 +133,37 @@ static struct GNUNET_SCHEDULER_Handle *sched;
  */
 static char *prefix_command;
 
+/**
+ * Option to append to each actual command.
+ */
+static char *final_option;
+
+/**
+ * ID of task called whenever we get a SIGCHILD.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
+
+/**
+ * ID of task called whenever the timeout for restarting a child
+ * expires.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
+
+/**
+ * Context for our SIGCHILD handler.
+ */
+static struct GNUNET_SIGNAL_Context *shc_chld;
+
+/**
+ * Pipe used to communicate shutdown via signal.
+ */
+static struct GNUNET_DISK_PipeHandle *sigpipe;
+
+/**
+ * Reading end of the signal pipe.
+ */
+static const struct GNUNET_DISK_FileHandle *pr;
+
 /**
  * Are we in shutdown mode?
  */
@@ -184,13 +182,39 @@ static int in_shutdown;
  */
 static struct GNUNET_SERVER_Handle *server;
 
+
 /**
- * Background task doing maintenance.
+ * If the configuration file changes, restart tasks that depended on that
+ * option.
  *
  * @param cls closure, NULL if we need to self-restart
  * @param tc context
  */
-static void maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
+static void 
+config_change_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+  struct stat sbuf;
+
+  pos = running;
+  while (pos != NULL)
+    {
+      /* FIXME: this test for config change is a bit too coarse grained */
+      if ( (0 == STAT (pos->config, &sbuf)) && 
+          (pos->mtime < sbuf.st_mtime) &&
+          (pos->pid != 0) )
+       {
+         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                     _("Restarting service `%s' due to configuration file change.\n"));
+         if (0 != PLIBC_KILL (pos->pid, SIGTERM))
+           GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
+         else
+           pos->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
+       }
+      pos = pos->next;
+    }
+}
+
 
 
 /**
@@ -213,6 +237,11 @@ write_result (void *cls, size_t size, void *buf)
                  _("Could not send status result to client\n"));
       return 0;                        /* error, not much we can do */
     }
+#if DEBUG_ARM
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+             "Sending status response %u to client\n",
+             (unsigned int) *res);
+#endif
   GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
   msg = buf;
   msg->size = htons (sizeof (struct GNUNET_MessageHeader));
@@ -307,23 +336,26 @@ free_entry (struct ServiceList *pos)
   GNUNET_free (pos);
 }
 
+#include "do_start_process.c"
 
 /**
  * Actually start the process for the given service.
  *
  * @param sl identifies service to start
+ * @param lsocks -1 terminated list of listen sockets to pass (systemd style), or NULL
  */
 static void
-start_process (struct ServiceList *sl)
+start_process (struct ServiceList *sl,
+              const int *lsocks)
 {
   char *loprefix;
   char *options;
-  char **argv;
-  unsigned int argv_size;
-  char *lopos;
   char *optpos;
-  const char *firstarg;
+  char *optend;
+  const char *next;
   int use_debug;
+  char b;
+  char *val;
 
   /* start service */
   if (GNUNET_OK !=
@@ -333,83 +365,79 @@ start_process (struct ServiceList *sl)
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
                                             sl->name, "OPTIONS", &options))
-    options = GNUNET_strdup ("");
+    {      
+      options = GNUNET_strdup (final_option);
+      if (NULL == strstr (options, "%"))
+       {
+         /* replace '{}' with service name */
+         while (NULL != (optpos = strstr (options, "{}")))
+           {
+             optpos[0] = '%';
+             optpos[1] = 's';
+             GNUNET_asprintf (&optpos,
+                              options,
+                              sl->name);
+             GNUNET_free (options);
+             options = optpos;
+           }
+         /* replace '$PATH' with value associated with "PATH" */
+         while (NULL != (optpos = strstr (options, "$")))
+           {
+             optend = optpos + 1;
+             while (isupper ( (unsigned char) *optend)) optend++;            
+             b = *optend;
+             if ('\0' == b)
+               next = "";
+             else
+               next = optend+1;
+             *optend = '\0';
+             if (GNUNET_OK !=
+                 GNUNET_CONFIGURATION_get_value_string (cfg, "PATHS",
+                                                        optpos+1,
+                                                        &val))
+               val = GNUNET_strdup ("");
+             *optpos = '\0';
+             GNUNET_asprintf (&optpos,
+                              "%s%s%c%s",
+                              options,
+                              val,
+                              b,
+                              next);
+             GNUNET_free (options);
+             GNUNET_free (val);
+             options = optpos;
+           }
+       }
+    }
   use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
 
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
 #if DEBUG_ARM
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
              "Starting service `%s' using binary `%s' and configuration `%s'\n",
              sl->name, sl->binary, sl->config);
 #endif
-  argv_size = 6;
-  if (use_debug)
-    argv_size += 2;
-  lopos = loprefix;
-  while ('\0' != *lopos)
-    {
-      if (*lopos == ' ')
-       argv_size++;
-      lopos++;
-    }
-  optpos = options;
-  while ('\0' != *optpos)
-    {
-      if (*optpos == ' ')
-       argv_size++;
-      optpos++;
-    }
-  firstarg = NULL;
-  argv = GNUNET_malloc (argv_size * sizeof (char *));
-  argv_size = 0;
-  lopos = loprefix;
-
-  while ('\0' != *lopos)
-    {
-      while (*lopos == ' ')
-       lopos++;
-      if (*lopos == '\0')
-       continue;
-      if (argv_size == 0)
-       firstarg = lopos;
-      argv[argv_size++] = lopos;
-      while (('\0' != *lopos) && (' ' != *lopos))
-       lopos++;
-      if ('\0' == *lopos)
-       continue;
-      *lopos = '\0';
-      lopos++;
-    }
-  if (argv_size == 0)
-    firstarg = sl->binary;
-  argv[argv_size++] = sl->binary;
-  argv[argv_size++] = "-c";
-  argv[argv_size++] = sl->config;
   if (GNUNET_YES == use_debug)
-    {
-      argv[argv_size++] = "-L";
-      argv[argv_size++] = "DEBUG";
-    }
-  optpos = options;
-  while ('\0' != *optpos)
-    {
-      while (*optpos == ' ')
-       optpos++;
-      if (*optpos == '\0')
-       continue;
-      argv[argv_size++] = optpos;
-      while (('\0' != *optpos) && (' ' != *optpos))
-       optpos++;
-      if ('\0' == *optpos)
-       continue;
-      *optpos = '\0';
-      optpos++;
-    }
-  argv[argv_size++] = NULL;
-  sl->pid = GNUNET_OS_start_process_v (firstarg, argv);
-  GNUNET_free (argv);
+    sl->pid = do_start_process (lsocks,
+                               loprefix,                               
+                               sl->binary,
+                               "-c", sl->config,
+                               "-L", "DEBUG",
+                               options,
+                               NULL);
+  else
+    sl->pid = do_start_process (lsocks,
+                               loprefix,
+                               sl->binary,
+                               "-c", sl->config,
+                               options,
+                               NULL);
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO, 
+             _("Starting service `%s' (PID: %d)\n"), 
+             sl->name,
+             (int) sl->pid);
   GNUNET_free (loprefix);
   GNUNET_free (options);
+  /* FIXME: should check sl->pid */
 }
 
 
@@ -418,9 +446,13 @@ start_process (struct ServiceList *sl)
  *
  * @param client who is asking for this
  * @param servicename name of the service to start
+ * @param lsocks -1 terminated list of listen sockets to pass (systemd style), or NULL
+ * @return GNUNET_OK on success, GNUNET_SYSERR on error
  */
-static void
-start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
+int
+start_service (struct GNUNET_SERVER_Client *client, 
+              const char *servicename,
+              const int *lsocks)
 {
   struct ServiceList *sl;
   char *binary;
@@ -433,19 +465,17 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
                  _("ARM is shutting down, service `%s' not started.\n"),
                  servicename);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
-      return;
+      return GNUNET_SYSERR;
     }
   sl = find_name (servicename);
   if (sl != NULL)
     {
-      /* already running, just increment RC */
-      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
                  _("Service `%s' already running.\n"), servicename);
-      sl->rc++;
       sl->next = running;
       running = sl;
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
-      return;
+      return GNUNET_SYSERR;
     }
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
@@ -455,7 +485,7 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
                  _("Binary implementing service `%s' not known!\n"),
                  servicename);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
-      return;
+      return GNUNET_SYSERR;
     }
   if ((GNUNET_OK !=
        GNUNET_CONFIGURATION_get_value_filename (cfg,
@@ -470,12 +500,12 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
       GNUNET_free (binary);
       GNUNET_free_non_null (config);
-      return;
+      return GNUNET_SYSERR;
     }
+  (void) stop_listening (servicename);
   sl = GNUNET_malloc (sizeof (struct ServiceList));
   sl->name = GNUNET_strdup (servicename);
   sl->next = running;
-  sl->rc = 1;
   sl->binary = binary;
   sl->config = config;
   sl->mtime = sbuf.st_mtime;
@@ -483,29 +513,10 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
   sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS;
 
   running = sl;
-  start_process (sl);
+  start_process (sl, lsocks);
   if (NULL != client)
     signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
-}
-
-
-/**
- * Free the given entry in the service list and signal
- * the given client that the service is now down.
- *
- * @param cls pointer to the client ("struct GNUNET_SERVER_Client*")
- * @param pos entry for the service
- */
-static void
-free_and_signal (void *cls, struct ServiceList *pos)
-{
-  struct GNUNET_SERVER_Client *client = cls;
-  /* find_name will remove "pos" from the list! */
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Service `%s' stopped\n", pos->name);
-  signal_result (client, pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
-  GNUNET_SERVER_receive_done (client, GNUNET_OK);
-  GNUNET_SERVER_client_drop (client);
-  free_entry (pos);
+  return GNUNET_OK;
 }
 
 
@@ -516,7 +527,8 @@ free_and_signal (void *cls, struct ServiceList *pos)
  * @param servicename name of the service to stop
  */
 static void
-stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
+stop_service (struct GNUNET_SERVER_Client *client,
+             const char *servicename)
 {
   struct ServiceList *pos;
 
@@ -525,28 +537,14 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
   pos = find_name (servicename);
   if (pos == NULL)
     {
-      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
-      GNUNET_SERVER_receive_done (client, GNUNET_OK);
-      return;
-    }
-  if (pos->rc > 1)
-    {
-      /* RC>1, just decrement RC */
-      pos->rc--;
-      pos->next = running;
-      running = pos;
-#if DEBUG_ARM
-      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-                 "Service `%s' still used by %u clients, will keep it running!\n",
-                 servicename, pos->rc);
-#endif
-      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
+      if (GNUNET_OK == stop_listening (servicename))
+       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      else
+       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UNKNOWN);
       GNUNET_SERVER_receive_done (client, GNUNET_OK);
       return;
     }
-  if (pos->rc == 1)
-    pos->rc--;                 /* decrement RC to zero */
-  if (pos->kill_continuation != NULL)
+  if (pos->killing_client != NULL)
     {
       /* killing already in progress */
 #if DEBUG_ARM
@@ -555,6 +553,8 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
 #endif
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
       GNUNET_SERVER_receive_done (client, GNUNET_OK);
+      pos->next = running;
+      running = pos;
       return;
     }
 
@@ -565,12 +565,20 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
                  "Termination request already sent to `%s' (since ARM is in shutdown).\n",
                  servicename);
 #endif
+      signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+      GNUNET_SERVER_receive_done (client, GNUNET_OK);
+      pos->next = running;
+      running = pos;
+      return;
+    }
+  if (pos->pid == 0)
+    {
+      /* process is in delayed restart, simply remove it! */
+      free_entry (pos);
       signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
       GNUNET_SERVER_receive_done (client, GNUNET_OK);
       return;
     }
-
-
 #if DEBUG_ARM
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
              "Sending kill signal to service `%s', waiting for process to die.\n",
@@ -580,11 +588,8 @@ stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
     GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
   pos->next = running;
   running = pos;
-  pos->kill_continuation = &free_and_signal;
-  pos->kill_continuation_cls = client;
+  pos->killing_client = client;
   GNUNET_SERVER_client_keep (client);
-  GNUNET_SCHEDULER_add_delayed (sched,
-                               MAINT_FAST_FREQUENCY, &maint, "non-null");
 }
 
 
@@ -614,7 +619,7 @@ handle_start (void *cls,
       GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
       return;
     }
-  start_service (client, servicename);
+  start_service (client, servicename, NULL);
   GNUNET_SERVER_receive_done (client, GNUNET_OK);
 }
 
@@ -650,175 +655,167 @@ handle_stop (void *cls,
 
 
 /**
- * Background task doing maintenance.
- *
- * @param cls closure, NULL if we need to self-restart
- * @param tc context
+ * Remove all entries for tasks that are not running
+ * (pid = 0) from the running list (they will no longer
+ * be restarted since we are shutting down).
  */
 static void
-maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+clean_up_running ()
 {
   struct ServiceList *pos;
-  struct ServiceList *prev;
   struct ServiceList *next;
-  const char *statstr;
-  int statcode;
-  struct stat sbuf;
-  struct GNUNET_TIME_Relative lowestRestartDelay;
-  int ret;
-
-  lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
-
-  if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
-    {
-      GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n"));
-
-      in_shutdown = GNUNET_YES;
-      pos = running;
-      while (NULL != pos)
-       {
-         if (pos->pid != 0)
-           {
-#if DEBUG_ARM
-             GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-                         "Sending SIGTERM to `%s'\n", pos->name);
-#endif
-             if (0 != PLIBC_KILL (pos->pid, SIGTERM))
-               GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
-           }
-         pos = pos->next;
-       }
-    }
-  if (cls == NULL)
-    {
-      if ((in_shutdown == GNUNET_YES) && (running == NULL))
-       {
-#if DEBUG_ARM
-         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ARM service terminates.\n");
-#endif
-         GNUNET_assert (server != NULL);
-         GNUNET_SERVER_destroy (server);
-         server = NULL;
-         return;               /* we are done! */
-       }
-      GNUNET_SCHEDULER_add_delayed (tc->sched,
-                                   (in_shutdown == GNUNET_YES)
-                                   ? MAINT_FAST_FREQUENCY
-                                   : MAINT_FREQUENCY, &maint, NULL);
-    }
-
-  /* check for services that died (WAITPID) */
+  struct ServiceList *prev;
+  pos = running;
   prev = NULL;
-  next = running;
-  while (NULL != (pos = next))
+  while (NULL != pos)
     {
-      enum GNUNET_OS_ProcessStatusType statusType;
-      unsigned long statusCode;
-
       next = pos->next;
-      if ((NULL != pos->kill_continuation) ||
-         ((GNUNET_YES == in_shutdown) && (pos->pid == 0)))
+      if (pos->pid == 0)
        {
          if (prev == NULL)
            running = next;
          else
            prev->next = next;
-         if (NULL != pos->kill_continuation)
-           pos->kill_continuation (pos->kill_continuation_cls, pos);
-         else
-           free_entry (pos);
-         continue;
-       }
-      if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
-                                                            &statusType,
-                                                            &statusCode))) ||
-         ((ret == GNUNET_NO) ||
-          (statusType == GNUNET_OS_PROCESS_STOPPED) ||
-          (statusType == GNUNET_OS_PROCESS_RUNNING)))
-       {
-         prev = pos;
-         continue;
-       }
-      if (statusType == GNUNET_OS_PROCESS_EXITED)
-       {
-         statstr = _( /* process termination method */ "exit");
-         statcode = statusCode;
-       }
-      else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
-       {
-         statstr = _( /* process termination method */ "signal");
-         statcode = statusCode;
+         free_entry (pos);
        }
       else
-       {
-         statstr = _( /* process termination method */ "unknown");
-         statcode = 0;
-       }
-      if (GNUNET_YES != in_shutdown)
-       GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
-                   _
-                   ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
-                   pos->name, statstr, statcode);
-#if DEBUG_ARM
-      else
-       GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-                   "Service `%s' terminated with status %s/%d\n",
-                   pos->name, statstr, statcode);
-#endif
-      /* schedule restart */
-      pos->pid = 0;
-      prev = pos;
+       prev = pos;
+      pos = next;
     }
+}
 
-  /* check for services that need to be restarted due to
-     configuration changes or because the last restart failed */
+
+/**
+ * We are done with everything.  Stop remaining 
+ * tasks, signal handler and the server. 
+ */
+static void
+do_shutdown ()
+{
+  GNUNET_SERVER_destroy (server);
+  server = NULL;
+  GNUNET_SIGNAL_handler_uninstall (shc_chld);
+  shc_chld = NULL;
+  GNUNET_SCHEDULER_cancel (sched, child_death_task);
+  child_death_task = GNUNET_SCHEDULER_NO_TASK;
+}
+
+#if DELAY_SHUTDOWN
+/**
+ * Dummy task to delay arm shutdown.
+ */
+void dummy_task (void *cls,
+                 const struct GNUNET_SCHEDULER_TaskContext * tc)
+{
+  GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Dummy task executing\n");
+  return;
+}
+#endif
+
+/**
+ * Task run for shutdown.
+ *
+ * @param cls closure, NULL if we need to self-restart
+ * @param tc context
+ */
+static void
+shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+
+#if DEBUG_ARM
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, _("Stopping all services\n"));
+#endif
+  stop_listening (NULL);
+  in_shutdown = GNUNET_YES;
   pos = running;
-  while (pos != NULL)
+  while (NULL != pos)
     {
-      if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
+      if (pos->pid != 0)
        {
          GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                     _
-                     ("Restarting service `%s' due to configuration file change.\n"));
+                     "Stopping service `%s' (PID: %d)\n",
+                     pos->name,
+                     pos->pid);
          if (0 != PLIBC_KILL (pos->pid, SIGTERM))
            GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
        }
-      if ((pos->pid == 0) && (GNUNET_YES != in_shutdown))
-       {
-         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                     _("Restarting service `%s'.\n"), pos->name);
-         /* FIXME: should have some exponentially
-            increasing timer to avoid tight restart loops */
-         if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value)
-           {
-             /* Otherwise, the process died for the first time, backoff should't increase */
-             if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
-               pos->backoff =
-                 GNUNET_TIME_relative_multiply (pos->backoff, 2);
-           }
+      pos = pos->next;
+    }
+#if DELAY_SHUTDOWN
+  GNUNET_SCHEDULER_add_delayed(sched, GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, 2), &dummy_task, NULL);
+#endif
+  if (running == NULL)
+    do_shutdown ();
+}
 
-         pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff);
 
-         lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay,
-                                                        GNUNET_TIME_absolute_get_remaining
-                                                        (pos->restartAt));
+/**
+ * Task run whenever it is time to restart a child that died.
+ *
+ * @param cls closure, always NULL
+ * @param tc context
+ */
+static void
+delayed_restart_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+  struct ServiceList *pos;
+  struct GNUNET_TIME_Relative lowestRestartDelay;
 
+  child_restart_task = GNUNET_SCHEDULER_NO_TASK;
+  if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
+    {
+      clean_up_running ();
+      if (NULL == running)
+       do_shutdown ();
+      return;
+    }
+  lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
+
+  /* check for services that need to be restarted due to
+     configuration changes or because the last restart failed */
+  pos = running;
+  while (pos != NULL)
+    {
+      if ( (pos->pid == 0) && 
+          (GNUNET_YES != in_shutdown) )
+       {
          if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
-           start_process (pos);
+           {
+             GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                         _("Restarting service `%s'.\n"), pos->name);
+             start_process (pos, NULL);
+           }
+         else
+           {
+             lowestRestartDelay 
+               = GNUNET_TIME_relative_min (lowestRestartDelay,
+                                           GNUNET_TIME_absolute_get_remaining
+                                           (pos->restartAt));
+           }
        }
       pos = pos->next;
+    }  
+  if (lowestRestartDelay.value != GNUNET_TIME_UNIT_FOREVER_REL.value)
+    {
+#if DEBUG_ARM
+      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+                 "Will restart process in %llums\n",
+                 (unsigned long long) lowestRestartDelay.value);
+#endif
+      child_restart_task
+       = GNUNET_SCHEDULER_add_delayed (sched,
+                                       lowestRestartDelay,
+                                       &delayed_restart_task,
+                                       NULL);
     }
 }
 
-#if 0
-static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
-
-static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
-
-
 
 /**
- * 
+ * Task triggered whenever we receive a SIGCHLD (child
+ * process died).  
  *
  * @param cls closure, NULL if we need to self-restart
  * @param tc context
@@ -831,48 +828,44 @@ maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
   struct ServiceList *next;
   const char *statstr;
   int statcode;
-  struct stat sbuf;
-  struct GNUNET_TIME_Relative lowestRestartDelay;
   int ret;
+  char c[16];
+  enum GNUNET_OS_ProcessStatusType statusType;
+  unsigned long statusCode;
 
   child_death_task = GNUNET_SCHEDULER_NO_TASK;
-  if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
-    return;
-  child_death_task =
-    GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
-                                   &maint_child_death, NULL);
+  if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
+    {
+      child_death_task =
+       GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
+                                       &maint_child_death, NULL);
+      return;    
+    }
+  /* consume the signal */
+  GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c)));
+
   /* check for services that died (WAITPID) */
   prev = NULL;
   next = running;
   while (NULL != (pos = next))
     {
-      enum GNUNET_OS_ProcessStatusType statusType;
-      unsigned long statusCode;
-
       next = pos->next;
-      if ((NULL != pos->kill_continuation) ||
-         ((GNUNET_YES == in_shutdown) && (pos->pid == 0)))
+      if (pos->pid == 0) 
        {
-         if (prev == NULL)
-           running = next;
-         else
-           prev->next = next;
-         if (NULL != pos->kill_continuation)
-           pos->kill_continuation (pos->kill_continuation_cls, pos);
-         else
-           free_entry (pos);
+         prev = pos;
          continue;
        }
       if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
                                                             &statusType,
                                                             &statusCode))) ||
-         ((ret == GNUNET_NO) ||
-          (statusType == GNUNET_OS_PROCESS_STOPPED) ||
-          (statusType == GNUNET_OS_PROCESS_RUNNING)))
+         ( (ret == GNUNET_NO) ||
+           (statusType == GNUNET_OS_PROCESS_STOPPED) ||
+           (statusType == GNUNET_OS_PROCESS_RUNNING)) )
        {
          prev = pos;
          continue;
        }
+
       if (statusType == GNUNET_OS_PROCESS_EXITED)
        {
          statstr = _( /* process termination method */ "exit");
@@ -888,97 +881,133 @@ maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
          statstr = _( /* process termination method */ "unknown");
          statcode = 0;
        }
+      pos->pid = 0;
+      if (NULL != pos->killing_client) 
+       {
+         if (prev == NULL)
+           running = next;
+         else
+           prev->next = next;
+         GNUNET_log (GNUNET_ERROR_TYPE_INFO, 
+                     _("Service `%s' stopped\n"),
+                     pos->name);
+         signal_result (pos->killing_client, 
+                        pos->name, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
+         GNUNET_SERVER_receive_done (pos->killing_client, GNUNET_OK);
+         GNUNET_SERVER_client_drop (pos->killing_client);
+         free_entry (pos);
+         continue;
+       }
       if (GNUNET_YES != in_shutdown)
-       GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
-                   _
-                   ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
-                   pos->name, statstr, statcode);
+       {
+         GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                     _
+                     ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
+                     pos->name, statstr, statcode);
+         /* schedule restart */
+         pos->restartAt
+           = GNUNET_TIME_relative_to_absolute (pos->backoff);
+         if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
+           pos->backoff 
+             = GNUNET_TIME_relative_multiply (pos->backoff, 2);
+         if (GNUNET_SCHEDULER_NO_TASK != child_restart_task)
+           GNUNET_SCHEDULER_cancel (sched, child_restart_task);
+         child_restart_task 
+           = GNUNET_SCHEDULER_add_with_priority (sched,
+                                                 GNUNET_SCHEDULER_PRIORITY_IDLE,
+                                                 &delayed_restart_task,
+                                                 NULL);
+       }
 #if DEBUG_ARM
       else
        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
                    "Service `%s' terminated with status %s/%d\n",
                    pos->name, statstr, statcode);
 #endif
-      /* schedule restart */
-      pos->pid = 0;
       prev = pos;
     }
-
-  /* check for services that need to be restarted due to
-     configuration changes or because the last restart failed */
-  pos = running;
-  while (pos != NULL)
+  if (in_shutdown)
+    clean_up_running ();
+  if ( (running == NULL) &&
+       (in_shutdown) )
     {
-      if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
-       {
-         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                     _
-                     ("Restarting service `%s' due to configuration file change.\n"));
-         if (0 != PLIBC_KILL (pos->pid, SIGTERM))
-           GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
-       }
-      if ((pos->pid == 0) && (GNUNET_YES != in_shutdown))
-       {
-         GNUNET_log (GNUNET_ERROR_TYPE_INFO,
-                     _("Restarting service `%s'.\n"), pos->name);
-         /* FIXME: should have some exponentially
-            increasing timer to avoid tight restart loops */
-         if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value)
-           {
-             /* Otherwise, the process died for the first time, backoff should't increase */
-             if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
-               pos->backoff =
-                 GNUNET_TIME_relative_multiply (pos->backoff, 2);
-           }
-
-         pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff);
-
-         lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay,
-                                                        GNUNET_TIME_absolute_get_remaining
-                                                        (pos->restartAt));
-
-         if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
-           start_process (pos);
-       }
-      pos = pos->next;
+      GNUNET_SERVER_destroy (server);
+      GNUNET_SIGNAL_handler_uninstall (shc_chld);
+      shc_chld = NULL;
+    }
+  else
+    {
+      child_death_task =
+       GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
+                                       &maint_child_death, NULL);
     }
 }
 
-#endif
 
+static size_t
+transmit_shutdown_ack (void *cls, size_t size, void *buf)
+{
+  struct GNUNET_SERVER_Client *client = cls;
+  struct GNUNET_MessageHeader *msg;
 
+  if (size < sizeof (struct GNUNET_MessageHeader))
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+                  _("Failed to transmit shutdown ACK.\n"));
+      GNUNET_SERVER_receive_done (client, GNUNET_SYSERR);
+      return 0;                 /* client disconnected */
+    }
 
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+              _("Transmitting shutdown ACK.\n"));
 
+  msg = (struct GNUNET_MessageHeader *) buf;
+  msg->type = htons (GNUNET_MESSAGE_TYPE_ARM_SHUTDOWN_ACK);
+  msg->size = htons (sizeof (struct GNUNET_MessageHeader));
+  GNUNET_SERVER_receive_done (client, GNUNET_OK);
+  GNUNET_SERVER_client_drop(client);
+  return sizeof (struct GNUNET_MessageHeader);
+}
 
 /**
- * List of handlers for the messages understood by this service.
+ * Handler for SHUTDOWN message.
+ *
+ * @param cls closure (refers to service)
+ * @param client identification of the client
+ * @param message the actual message
  */
-static struct GNUNET_SERVER_MessageHandler handlers[] = {
-  {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
-  {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
-  {NULL, NULL, 0, 0}
-};
-
-static struct GNUNET_SIGNAL_Context *shc_chld;
+static void
+handle_shutdown (void *cls,
+                 struct GNUNET_SERVER_Client *client,
+                 const struct GNUNET_MessageHeader *message)
+{
+  GNUNET_SERVER_client_keep(client);
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+              _("Initiating shutdown as requested by client.\n"));
 
-/**
- * Pipe used to communicate shutdown via signal.
- */
-static struct GNUNET_DISK_PipeHandle *sigpipe;
+  GNUNET_SERVER_notify_transmit_ready (client,
+                                       sizeof(struct GNUNET_MessageHeader),
+                                       GNUNET_TIME_UNIT_FOREVER_REL,
+                                       &transmit_shutdown_ack, client);
+  GNUNET_SERVER_client_persist_ (client);
+  GNUNET_SCHEDULER_shutdown (sched);
+}
 
-static const struct GNUNET_DISK_FileHandle *pr;
 
 /**
- * Signal handler called for signals that should cause us to shutdown.
+ * Signal handler called for SIGCHLD.  Triggers the
+ * respective handler by writing to the trigger pipe.
  */
 static void
 sighandler_child_death ()
 {
   static char c;
-
-  GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
-                         (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
-                         sizeof (c));
+  int old_errno = errno; /* back-up errno */
+  GNUNET_break (1 == 
+               GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
+                                       (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
+                                       sizeof (c)));
+  errno = old_errno; /* restore errno */
 }
 
 
@@ -996,32 +1025,47 @@ run (void *cls,
      struct GNUNET_SERVER_Handle *serv,
      const struct GNUNET_CONFIGURATION_Handle *c)
 {
+  static const struct GNUNET_SERVER_MessageHandler handlers[] = {
+    {&handle_start, NULL, GNUNET_MESSAGE_TYPE_ARM_START, 0},
+    {&handle_stop, NULL, GNUNET_MESSAGE_TYPE_ARM_STOP, 0},
+    {&handle_shutdown, NULL, GNUNET_MESSAGE_TYPE_ARM_SHUTDOWN,
+     sizeof (struct GNUNET_MessageHeader)},
+    {NULL, NULL, 0, 0}
+  };
   char *defaultservices;
   char *pos;
 
-  shc_chld = GNUNET_SIGNAL_handler_install (SIGCHLD, &sighandler_child_death);
+  cfg = c;
+  sched = s;
+  server = serv;
+  GNUNET_assert (serv != NULL);
+  shc_chld = GNUNET_SIGNAL_handler_install (GNUNET_SIGCHLD, &sighandler_child_death);
   GNUNET_assert (sigpipe == NULL);
   sigpipe = GNUNET_DISK_pipe (GNUNET_NO);
   GNUNET_assert (sigpipe != NULL);
   pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
   GNUNET_assert (pr != NULL);
-
   GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES);
-  GNUNET_assert (serv != NULL);
-  cfg = c;
-  sched = s;
-  server = serv;
-  /*
-   * child_death_task =
+  GNUNET_SCHEDULER_add_delayed (sched,
+                               GNUNET_TIME_UNIT_FOREVER_REL,
+                               &shutdown_task,
+                               NULL);
+  child_death_task =
     GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
                                    &maint_child_death, NULL);
-*/
+
   if (GNUNET_OK !=
       GNUNET_CONFIGURATION_get_value_string (cfg,
                                             "ARM",
                                             "GLOBAL_PREFIX",
                                             &prefix_command))
     prefix_command = GNUNET_strdup ("");
+  if (GNUNET_OK !=
+      GNUNET_CONFIGURATION_get_value_string (cfg,
+                                            "ARM",
+                                            "GLOBAL_POSTFIX",
+                                            &final_option))
+    final_option = GNUNET_strdup ("");
   /* start default services... */
   if (GNUNET_OK ==
       GNUNET_CONFIGURATION_get_value_string (cfg,
@@ -1033,11 +1077,14 @@ run (void *cls,
       GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
                  "Starting default services `%s'\n", defaultservices);
 #endif
-      pos = strtok (defaultservices, " ");
-      while (pos != NULL)
+      if (0 < strlen (defaultservices))
        {
-         start_service (NULL, pos);
-         pos = strtok (NULL, " ");
+         pos = strtok (defaultservices, " ");
+         while (pos != NULL)
+           {
+             start_service (NULL, pos, NULL);
+             pos = strtok (NULL, " ");
+           }
        }
       GNUNET_free (defaultservices);
     }
@@ -1049,13 +1096,16 @@ run (void *cls,
 #endif
     }
 
+  /* create listening sockets for future services*/
+  prepareServices (cfg, sched);
+  
   /* process client requests */
   GNUNET_SERVER_add_handlers (server, handlers);
 
   /* manage services */
   GNUNET_SCHEDULER_add_with_priority (sched,
                                      GNUNET_SCHEDULER_PRIORITY_IDLE,
-                                     &maint, NULL);
+                                     &config_change_task, NULL);
 }