implement extended status information for arm
authorFlorian Dold <florian.dold@gmail.com>
Tue, 24 Sep 2019 15:59:18 +0000 (17:59 +0200)
committerFlorian Dold <florian.dold@gmail.com>
Tue, 24 Sep 2019 15:59:18 +0000 (17:59 +0200)
src/arm/arm.h
src/arm/arm_api.c
src/arm/arm_monitor_api.c
src/arm/gnunet-arm.c
src/arm/gnunet-service-arm.c
src/arm/test_exponential_backoff.c
src/arm/test_gnunet_service_arm.c
src/include/gnunet_arm_service.h

index 98358606001d4ba0ed59ff018c4dac94b3027008..bddcd7cf3de9add1210e4f6914230460261ec73c 100644 (file)
@@ -93,6 +93,44 @@ struct GNUNET_ARM_ResultMessage {
   uint32_t result;
 };
 
+struct GNUNET_ARM_ServiceInfoMessage {
+  /**
+   * String pool index for the service's name.
+   */
+  uint16_t name_index;
+
+  /**
+   * String pool index for the service's binary.
+   */
+  uint16_t binary_index;
+
+  /**
+   * Last process exit status.
+   */
+  int16_t last_exit_status;
+
+  /**
+   * Padding.
+   */
+  uint16_t padding;
+
+  /**
+   * Status from the 'enum GNUNET_ARM_ServiceStatus'
+   */
+  uint32_t status;
+
+  /**
+   * Time when the sevice will be restarted, if applicable
+   * to the current status.
+   */
+  struct GNUNET_TIME_AbsoluteNBO restart_at;
+
+  /**
+   * Time when the sevice was first started, if applicable.
+   */
+  struct GNUNET_TIME_AbsoluteNBO last_started_at;
+};
+
 /**
  * Reply from ARM to client for the
  * #GNUNET_MESSAGE_TYPE_ARM_LIST request followed by count
@@ -107,10 +145,13 @@ struct GNUNET_ARM_ListResultMessage {
   struct GNUNET_ARM_Message arm_msg;
 
   /**
-   * Number of '\0' terminated strings that follow
-   * this message.
+   * Number of 'struct GNUNET_ARM_ServiceInfoMessage' that
+   * are at the end of this message.
    */
   uint16_t count;
+
+  /* struct GNUNET_ARM_ServiceInfoMessage[count]; */
+  /* pool of 0-terminated strings */
 };
 
 GNUNET_NETWORK_STRUCT_END
index b42c95dc04d2617924a2c6bc10b3481adc92a504..4c3bb04886df8f331cb356794d1b2f840d13ecba 100644 (file)
@@ -294,7 +294,32 @@ handle_arm_result(void *cls, const struct GNUNET_ARM_ResultMessage *res)
 
 
 /**
- * Checked that list result message is well-formed.
+ * Read from a string pool.
+ *
+ * @param pool_start start of the string pool
+ * @param pool_size size of the string pool
+ * @param str_index index into the string pool
+ * @returns an index into the string pool, or
+ *          NULL if the index is out of bounds
+ */
+static const char *
+pool_get (const char *pool_start, size_t pool_size, size_t str_index)
+{
+  const char *str_start;
+  const char *end;
+
+  if (str_index >= pool_size)
+    return NULL;
+  str_start = pool_start + str_index;
+  end = memchr(str_start, 0, pool_size - str_index);
+  if (NULL == end)
+    return NULL;
+  return str_start;
+}
+
+
+/**
+ * Check that list result message is well-formed.
  *
  * @param cls our `struct GNUNET_ARM_Handle`
  * @param lres the message received from the arm service
@@ -304,23 +329,38 @@ static int
 check_arm_list_result(void *cls,
                       const struct GNUNET_ARM_ListResultMessage *lres)
 {
-  const char *pos = (const char *)&lres[1];
   uint16_t rcount = ntohs(lres->count);
   uint16_t msize = ntohs(lres->arm_msg.header.size) - sizeof(*lres);
-  uint16_t size_check;
+  struct GNUNET_ARM_ServiceInfoMessage *ssm;
+  size_t pool_size;
+  char *pool_start;
+
+  if ((rcount * sizeof (struct GNUNET_ARM_ServiceInfoMessage) > msize))
+  {
+    GNUNET_break_op (0);
+    return GNUNET_NO;
+  }
+
+  ssm = (struct GNUNET_ARM_ServiceInfoMessage *) &lres[1];
+  pool_start = (char *) (ssm + rcount);
+  pool_size = msize - (rcount * sizeof (struct GNUNET_ARM_ServiceInfoMessage));
 
   (void)cls;
-  size_check = 0;
   for (unsigned int i = 0; i < rcount; i++)
     {
-      const char *end = memchr(pos, 0, msize - size_check);
-      if (NULL == end)
-        {
-          GNUNET_break(0);
-          return GNUNET_SYSERR;
-        }
-      size_check += (end - pos) + 1;
-      pos = end + 1;
+      uint16_t name_index = ntohs (ssm->name_index);
+      uint16_t binary_index = ntohs (ssm->binary_index);
+      if (NULL == pool_get (pool_start, pool_size, name_index))
+      {
+        GNUNET_break_op (0);
+        return GNUNET_NO;
+      }
+      if (NULL == pool_get (pool_start, pool_size, binary_index))
+      {
+        GNUNET_break_op (0);
+        return GNUNET_NO;
+      }
+      ssm++;
     }
   return GNUNET_OK;
 }
@@ -338,12 +378,13 @@ handle_arm_list_result(void *cls,
 {
   struct GNUNET_ARM_Handle *h = cls;
   uint16_t rcount = ntohs(lres->count);
-  const char *list[rcount];
-  const char *pos = (const char *)&lres[1];
   uint16_t msize = ntohs(lres->arm_msg.header.size) - sizeof(*lres);
+  struct GNUNET_ARM_ServiceInfo list[rcount];
+  struct GNUNET_ARM_ServiceInfoMessage *ssm;
   struct GNUNET_ARM_Operation *op;
-  uint16_t size_check;
   uint64_t id;
+  size_t pool_size;
+  char *pool_start;
 
   id = GNUNET_ntohll(lres->arm_msg.request_id);
   op = find_op_by_id(h, id);
@@ -354,16 +395,31 @@ handle_arm_list_result(void *cls,
           (unsigned long long)id);
       return;
     }
-  size_check = 0;
+
+  GNUNET_assert ((rcount * sizeof (struct GNUNET_ARM_ServiceInfoMessage) <= msize));
+
+  ssm = (struct GNUNET_ARM_ServiceInfoMessage *) &lres[1];
+  pool_start = (char *) (ssm + rcount);
+  pool_size = msize - (rcount * sizeof (struct GNUNET_ARM_ServiceInfoMessage));
+
   for (unsigned int i = 0; i < rcount; i++)
     {
-      const char *end = memchr(pos, 0, msize - size_check);
-
-      /* Assert, as this was already checked in #check_arm_list_result() */
-      GNUNET_assert(NULL != end);
-      list[i] = pos;
-      size_check += (end - pos) + 1;
-      pos = end + 1;
+      uint16_t name_index = ntohs (ssm->name_index);
+      uint16_t binary_index = ntohs (ssm->binary_index);
+      const char *name;
+      const char *binary;
+
+      GNUNET_assert (NULL != (name = pool_get (pool_start, pool_size, name_index)));
+      GNUNET_assert (NULL != (binary = pool_get (pool_start, pool_size, binary_index)));
+      list[i] = (struct GNUNET_ARM_ServiceInfo) {
+        .name = name,
+        .binary = binary,
+        .status = ntohl (ssm->status),
+        .last_started_at = GNUNET_TIME_absolute_ntoh (ssm->last_started_at),
+        .restart_at = GNUNET_TIME_absolute_ntoh (ssm->restart_at),
+        .last_exit_status = ntohs (ssm->last_exit_status),
+      };
+      ssm++;
     }
   if (NULL != op->list_cont)
     op->list_cont(op->cont_cls, GNUNET_ARM_REQUEST_SENT_OK, rcount, list);
index c6e1e26839a893639969acc3aeeeb3ce1c69375e..00faaaef16afc581dbed71e0c126194117e931c3 100644 (file)
@@ -61,7 +61,7 @@ struct GNUNET_ARM_MonitorHandle {
   /**
    * Callback to invoke on status updates.
    */
-  GNUNET_ARM_ServiceStatusCallback service_status;
+  GNUNET_ARM_ServiceMonitorCallback service_status;
 
   /**
    * Closure for @e service_status.
@@ -153,9 +153,9 @@ static void
 handle_monitor_notify(void *cls, const struct GNUNET_ARM_StatusMessage *res)
 {
   struct GNUNET_ARM_MonitorHandle *h = cls;
-  enum GNUNET_ARM_ServiceStatus status;
+  enum GNUNET_ARM_ServiceMonitorStatus status;
 
-  status = (enum GNUNET_ARM_ServiceStatus)ntohl(res->status);
+  status = (enum GNUNET_ARM_ServiceMonitorStatus)ntohl(res->status);
   LOG(GNUNET_ERROR_TYPE_DEBUG,
       "Received notification from ARM for service `%s' with status %d\n",
       (const char *)&res[1],
@@ -230,7 +230,7 @@ reconnect_arm_monitor(struct GNUNET_ARM_MonitorHandle *h)
  */
 struct GNUNET_ARM_MonitorHandle *
 GNUNET_ARM_monitor_start(const struct GNUNET_CONFIGURATION_Handle *cfg,
-                         GNUNET_ARM_ServiceStatusCallback cont,
+                         GNUNET_ARM_ServiceMonitorCallback cont,
                          void *cont_cls)
 {
   struct GNUNET_ARM_MonitorHandle *h;
index 3396a4dbe28bd994848fe90c3e835649f23b22bf..fcbff2331af99ec073d8849576074276a8cd9280 100644 (file)
@@ -53,6 +53,11 @@ static int delete;
  */
 static int quiet;
 
+/**
+ * Set if we should print all services, including stopped ones.
+ */
+static int show_all;
+
 /**
  * Monitor ARM activity.
  */
@@ -508,13 +513,13 @@ term_callback(void *cls,
  * @param cls closure (unused)
  * @param rs request status (success, failure, etc.)
  * @param count number of services in the list
- * @param list list of services that are running
+ * @param list list of services managed by arm
  */
 static void
 list_callback(void *cls,
               enum GNUNET_ARM_RequestStatus rs,
               unsigned int count,
-              const char *const *list)
+              const struct GNUNET_ARM_ServiceInfo *list)
 {
   (void)cls;
   op = NULL;
@@ -540,9 +545,41 @@ list_callback(void *cls,
       return;
     }
   if (!quiet)
-    fprintf(stdout, "%s", _("Running services:\n"));
+  {
+    if (show_all)
+      fprintf(stdout, "%s", _("All services:\n"));
+    else
+      fprintf(stdout, "%s", _("Services (excluding stopped services):\n"));
+  }
   for (unsigned int i = 0; i < count; i++)
-    fprintf(stdout, "%s\n", list[i]);
+  {
+    struct GNUNET_TIME_Relative restart_in;
+    switch (list[i].status)
+    {
+      case GNUNET_ARM_SERVICE_STATUS_STOPPED:
+        if (show_all)
+          fprintf(stdout, "%s (binary='%s', status=stopped)\n", list[i].name, list[i].binary);
+        break;
+      case GNUNET_ARM_SERVICE_STATUS_FAILED:
+          restart_in = GNUNET_TIME_absolute_get_remaining (list[i].restart_at);
+          fprintf(stdout, "%s (binary='%s', status=failed, exit_status=%d, restart_delay='%s')\n",
+                  list[i].name,
+                  list[i].binary,
+                  list[i].last_exit_status,
+                  GNUNET_STRINGS_relative_time_to_string (restart_in, GNUNET_YES));
+          break;
+      case GNUNET_ARM_SERVICE_STATUS_FINISHED:
+          fprintf(stdout, "%s (binary='%s', status=finished)\n", list[i].name, list[i].binary);
+          break;
+      case GNUNET_ARM_SERVICE_STATUS_STARTED:
+          fprintf(stdout, "%s (binary='%s', status=started)\n", list[i].name, list[i].binary);
+          break;
+      default:
+        fprintf(stdout, "%s (binary='%s', status=unknown)\n", list[i].name, list[i].binary);
+        break;
+
+    }
+  }
   al_task = GNUNET_SCHEDULER_add_now(&action_loop, NULL);
 }
 
@@ -652,7 +689,7 @@ action_loop(void *cls)
 static void
 srv_status(void *cls,
            const char *service,
-           enum GNUNET_ARM_ServiceStatus status)
+           enum GNUNET_ARM_ServiceMonitorStatus status)
 {
   const char *msg;
 
@@ -773,6 +810,10 @@ main(int argc, char *const *argv)
                                 "SERVICE",
                                 gettext_noop("stop a particular service"),
                                 &term),
+    GNUNET_GETOPT_option_flag('a',
+                              "all",
+                              gettext_noop("also show stopped services (used with -I)"),
+                              &show_all),
     GNUNET_GETOPT_option_flag('s',
                               "start",
                               gettext_noop(
index e02314b91da2e35045efcc56740bbb023ebb4090..b30ae518e9d788d05b65acfb12583728ff5eaae0 100644 (file)
@@ -163,6 +163,11 @@ struct ServiceList {
    */
   struct GNUNET_TIME_Relative backoff;
 
+  /**
+   * Absolute time at which the process was (re-)started last.
+   */
+  struct GNUNET_TIME_Absolute last_started_at;
+
   /**
    * Absolute time at which the process is scheduled to restart in case of death
    */
@@ -186,6 +191,11 @@ struct ServiceList {
    * are on Windoze).
    */
   int pipe_control;
+
+  /**
+   * Last exit status of the process.
+   */
+  int last_exit_status;
 };
 
 /**
@@ -696,7 +706,7 @@ signal_result(struct GNUNET_SERVICE_Client *client,
  */
 static void
 broadcast_status(const char *name,
-                 enum GNUNET_ARM_ServiceStatus status,
+                 enum GNUNET_ARM_ServiceMonitorStatus status,
                  struct GNUNET_SERVICE_Client *unicast)
 {
   struct GNUNET_MQ_Envelope *env;
@@ -914,6 +924,7 @@ start_process(struct ServiceList *sl,
     }
   GNUNET_free(binary);
   GNUNET_free(quotedbinary);
+  sl->last_started_at = GNUNET_TIME_absolute_get ();
   if (NULL == sl->proc)
     {
       GNUNET_log(GNUNET_ERROR_TYPE_ERROR,
@@ -1299,6 +1310,29 @@ handle_stop(void *cls, const struct GNUNET_ARM_Message *amsg)
 }
 
 
+/**
+ * Write a string to a string pool.
+ *
+ * @param pool_start pointer to the start of the string pool
+ * @param pool_size size of the string pool
+ * @param[in,out] pool_pos current position index in the string pool,
+ *                will be updated
+ * @param str string to write to the string pool
+ * @returns GNUNET_OK if the string fits into the pool,
+ *          GNUNET_SYSERR otherwise
+ */
+static int
+pool_write(char *pool_start, size_t pool_size, size_t *pool_pos, char *str)
+{
+  size_t next_pos = (*pool_pos) + strlen (str) + 1;
+
+  if (next_pos > pool_size)
+    return GNUNET_SYSERR;
+  memcpy (pool_start + *pool_pos, str, strlen (str) + 1);
+  *pool_pos = next_pos;
+  return GNUNET_OK;
+}
+
 /**
  * Handle LIST-message.
  *
@@ -1311,42 +1345,68 @@ handle_list(void *cls, const struct GNUNET_ARM_Message *request)
   struct GNUNET_SERVICE_Client *client = cls;
   struct GNUNET_MQ_Envelope *env;
   struct GNUNET_ARM_ListResultMessage *msg;
-  size_t string_list_size;
+  size_t extra_size;
   struct ServiceList *sl;
   uint16_t count;
-  char *pos;
+  size_t pool_size;
+  size_t pool_pos;
+  char *pool_start;
+  struct GNUNET_ARM_ServiceInfoMessage *ssm;
 
-  GNUNET_break(0 == ntohl(request->reserved));
+  GNUNET_break_op(0 == ntohl(request->reserved));
   count = 0;
-  string_list_size = 0;
+  pool_size = 0;
 
-  /* first count the running processes get their name's size */
+  /* Do one pass over the list to compute the number of services
+   * and the string pool size */
   for (sl = running_head; NULL != sl; sl = sl->next)
     {
-      if (NULL != sl->proc)
-        {
-          string_list_size += strlen(sl->name);
-          string_list_size += strlen(sl->binary);
-          string_list_size += 4;
-          count++;
-        }
+        pool_size += strlen(sl->name) + 1;
+        pool_size += strlen(sl->binary) + 1;
+        count++;
     }
 
+  extra_size = pool_size + (count * sizeof (struct GNUNET_ARM_ServiceInfoMessage));
   env = GNUNET_MQ_msg_extra(msg,
-                            string_list_size,
+                            extra_size,
                             GNUNET_MESSAGE_TYPE_ARM_LIST_RESULT);
   msg->arm_msg.request_id = request->request_id;
   msg->count = htons(count);
 
-  pos = (char *)&msg[1];
+  ssm = (struct GNUNET_ARM_ServiceInfoMessage *) &msg[1];
+  pool_start = (char *) (ssm + count);
+  pool_pos = 0;
+
   for (sl = running_head; NULL != sl; sl = sl->next)
     {
-      if (NULL != sl->proc)
+      ssm->name_index = htons ((uint16_t) pool_pos);
+      GNUNET_assert (GNUNET_OK == pool_write (pool_start, pool_size, &pool_pos, sl->name));
+      ssm->binary_index = htons ((uint16_t) pool_pos);
+      GNUNET_assert (GNUNET_OK == pool_write (pool_start, pool_size, &pool_pos, sl->binary));
+      if (NULL == sl->proc)
+      {
+        if (0 == sl->last_started_at.abs_value_us)
+        {
+          /* Process never started */
+          ssm->status = htonl (GNUNET_ARM_SERVICE_STATUS_STOPPED);
+        }
+        else if (0 == sl->last_exit_status)
         {
-          size_t s = strlen(sl->name) + strlen(sl->binary) + 4;
-          GNUNET_snprintf(pos, s, "%s (%s)", sl->name, sl->binary);
-          pos += s;
+          ssm->status = htonl (GNUNET_ARM_SERVICE_STATUS_FINISHED);
         }
+        else
+        {
+          ssm->status = htonl (GNUNET_ARM_SERVICE_STATUS_FAILED);
+          ssm->last_exit_status = htons (sl->last_exit_status);
+        }
+      }
+      else
+      {
+        ssm->status = htonl (GNUNET_ARM_SERVICE_STATUS_STARTED);
+      }
+      ssm->last_started_at = GNUNET_TIME_absolute_hton (sl->last_started_at);
+      ssm->restart_at = GNUNET_TIME_absolute_hton (sl->restart_at);
+      ssm++;
     }
   GNUNET_MQ_send(GNUNET_SERVICE_client_get_mq(client), env);
   GNUNET_SERVICE_client_continue(client);
@@ -1700,6 +1760,7 @@ maint_child_death(void *cls)
         }
       if (GNUNET_YES != in_shutdown)
         {
+          pos->last_exit_status = statcode;
           if ((statusType == GNUNET_OS_PROCESS_EXITED) && (statcode == 0))
             {
               /* process terminated normally, allow restart at any time */
@@ -1722,7 +1783,7 @@ maint_child_death(void *cls)
           else
             {
               GNUNET_log(
-                GNUNET_ERROR_TYPE_INFO,
+                GNUNET_ERROR_TYPE_WARNING,
                 _("Service `%s' terminated with status %s/%d, will restart in %s\n"),
                 pos->name,
                 statstr,
index 8190d29c29557e21e9b6e394d5dad5fa201bba84..b13ab1cb899ffa9d42e813b208cfcb246ed997e5 100644 (file)
@@ -196,7 +196,7 @@ arm_stop_cb(void *cls,
 static void
 srv_status(void *cls,
            const char *service,
-           enum GNUNET_ARM_ServiceStatus status)
+           enum GNUNET_ARM_ServiceMonitorStatus status)
 {
   if (status == GNUNET_ARM_SERVICE_MONITORING_STARTED)
     {
index 8c8c664f94b665104ced5c4cb34a239b04dbfdfd..cb2c14438612b7b6d6319a62b9ff8013bae20422 100644 (file)
@@ -78,7 +78,7 @@ static void
 service_list(void *cls,
              enum GNUNET_ARM_RequestStatus rs,
              unsigned int count,
-             const char *const*list)
+             const struct GNUNET_ARM_ServiceInfo *list)
 {
   unsigned int i;
 
@@ -89,13 +89,13 @@ service_list(void *cls,
     goto stop_arm;
   for (i = 0; i < count; i++)
     {
-      if (0 == strcasecmp(list[i],
-                          "resolver (gnunet-service-resolver)"))
-        {
+      if ((0 == strcasecmp(list[i].name, "resolver")) && 
+          (0 == strcasecmp(list[i].binary, "gnunet-service-resolver")))
+      {
           GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
                      "Got service list, now stopping arm\n");
           ret = 0;
-        }
+      }
     }
 
 stop_arm:
index faf89128f9c3b873eeeb8458728f2ebcd84de3d3..9e79c26e7ac0c0e423378bda03396634e69e519b 100644 (file)
@@ -70,7 +70,7 @@ enum GNUNET_ARM_RequestStatus {
 /**
  * Statuses of services.
  */
-enum GNUNET_ARM_ServiceStatus {
+enum GNUNET_ARM_ServiceMonitorStatus {
   /**
    * Dummy message.
    */
@@ -149,6 +149,72 @@ enum GNUNET_ARM_Result {
 };
 
 
+/**
+ * Status of a service managed by ARM.
+ */
+enum GNUNET_ARM_ServiceStatus
+{
+  /**
+   * Service is stopped.
+   */
+  GNUNET_ARM_SERVICE_STATUS_STOPPED = 0,
+
+  /**
+   * Service has been started and is currently running.
+   */
+  GNUNET_ARM_SERVICE_STATUS_STARTED = 1,
+
+  /**
+   * The service has previously failed, and
+   * will be restarted.
+   */
+  GNUNET_ARM_SERVICE_STATUS_FAILED = 2,
+
+  /**
+   * The service was started, but then exited normally.
+   */
+  GNUNET_ARM_SERVICE_STATUS_FINISHED = 3,
+};
+
+
+/**
+ * Information about a service managed by ARM.
+ */
+struct GNUNET_ARM_ServiceInfo
+{
+  /**
+   * The current status of the service.
+   */
+  enum GNUNET_ARM_ServiceStatus status;
+
+  /**
+   * The name of the service.
+   */
+  const char *name;
+
+  /**
+   * The binary used to execute the service.
+   */
+  const char *binary;
+
+  /**
+   * Time when the sevice will be restarted, if applicable
+   * to the current status.
+   */
+  struct GNUNET_TIME_Absolute restart_at;
+
+  /**
+   * Time when the sevice was first started, if applicable.
+   */
+  struct GNUNET_TIME_Absolute last_started_at;
+
+  /**
+   * Last process exit status.
+   */
+  int last_exit_status;
+};
+
+
 /**
  * Handle for interacting with ARM.
  */
@@ -197,13 +263,13 @@ typedef void
  * @param cls closure
  * @param rs status of the request
  * @param count number of strings in the list
- * @param list list of running services
+ * @param list list of services managed by arm
  */
 typedef void
 (*GNUNET_ARM_ServiceListCallback) (void *cls,
                                    enum GNUNET_ARM_RequestStatus rs,
                                    unsigned int count,
-                                   const char *const*list);
+                                   const struct GNUNET_ARM_ServiceInfo *list);
 
 
 /**
@@ -309,9 +375,9 @@ struct GNUNET_ARM_MonitorHandle;
  * @param status status of the service
  */
 typedef void
-(*GNUNET_ARM_ServiceStatusCallback) (void *cls,
+(*GNUNET_ARM_ServiceMonitorCallback) (void *cls,
                                      const char *service,
-                                     enum GNUNET_ARM_ServiceStatus status);
+                                     enum GNUNET_ARM_ServiceMonitorStatus status);
 
 
 /**
@@ -327,7 +393,7 @@ typedef void
  */
 struct GNUNET_ARM_MonitorHandle *
 GNUNET_ARM_monitor_start(const struct GNUNET_CONFIGURATION_Handle *cfg,
-                         GNUNET_ARM_ServiceStatusCallback cont,
+                         GNUNET_ARM_ServiceMonitorCallback cont,
                          void *cont_cls);