luci-mod-status: fix average calculations
[oweals/luci.git] / contrib / package / freifunk-watchdog / src / watchdog.c
index d4ccff82215ab0cb9a80b3eedf0d122fe9b89a76..4bc3ab0b67959be199077c4f85eedc61c1308428 100644 (file)
  *   along with this program; if not, write to the Free Software
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  *
- *   Copyright (C) 2009 Jo-Philipp Wich <xm@subsignal.org>
+ *   Copyright (C) 2009 Jo-Philipp Wich <jow@openwrt.org>
  */
 
 #include "watchdog.h"
 
+/* Global watchdog fd, required by signal handler */
+int wdfd = -1;
+
+/* Handle finished children */
+static void sigchld_handler(int sig)
+{
+       pid_t pid;
+
+       while( (pid = waitpid(-1, NULL, WNOHANG)) > 0 )
+               syslog(LOG_INFO, "Child returned (pid %d)", pid);
+}
+
+/* Watchdog shutdown helper */
+static void shutdown_watchdog(int sig)
+{
+       static const char wshutdown = WATCH_SHUTDOWN;
+
+       if( wdfd > -1 )
+       {
+               syslog(LOG_INFO, "Stopping watchdog timer");
+               write(wdfd, &wshutdown, 1);
+               close(wdfd);
+               wdfd = -1;
+       }
+
+       exit(0);
+}
+
 /* Get BSSID of given interface */
 static int iw_get_bssid(int iwfd, const char *ifname, char *bssid)
 {
@@ -131,13 +159,31 @@ static int find_process(const char *name)
        return -1;
 }
 
+/* Get the 5 minute load average */
+static double find_loadavg(void)
+{
+       int fd;
+       char buffer[10];
+       double load = 0.00;
+
+       if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 )
+       {
+               if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) )
+                       load = atof(&buffer[5]);
+
+               close(fd);
+       }
+
+       return load;
+}
+
 /* Check if given uci file was updated */
 static int check_uci_update(const char *config, time_t *mtime)
 {
        struct stat s;
        char path[128];
 
-       snprintf(path, sizeof(path), "/etc/config/%s", config);
+       snprintf(path, sizeof(path), "/var/state/%s", config);
        if( stat(path, &s) > -1 )
        {
                if( (*mtime == 0) || (s.st_mtime > *mtime) )
@@ -145,70 +191,60 @@ static int check_uci_update(const char *config, time_t *mtime)
                        *mtime = s.st_mtime;
                        return 1;
                }
-               else
-               {
-                       snprintf(path, sizeof(path), "/var/state/%s", config);
-                       if( stat(path, &s) > -1 )
-                       {
-                               if( (*mtime == 0) || (s.st_mtime > *mtime) )
-                               {
-                                       *mtime = s.st_mtime;
-                                       return 1;
-                               }
-                       }
-
-                       return 0;
-               }
        }
 
-       return -1;
+       return 0;
 }
 
 /* Add tuple */
-static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr)
+static void load_wifi_uci_add_iface(const char *section, struct uci_wifi_iface_itr_ctx *itr)
 {
        wifi_tuple_t *t;
        const char *ucitmp;
        int val = 0;
 
-       if( (t = (wifi_tuple_t *)malloc(sizeof(wifi_tuple_t))) != NULL )
+       ucitmp = ucix_get_option(itr->ctx, "wireless", section, "mode");
+       if( ucitmp && !strncmp(ucitmp, "adhoc", 5) )
        {
-               ucitmp = ucix_get_option(itr->ctx, "wireless", section, "ifname");
-               if(ucitmp)
+               if( (t = (wifi_tuple_t *)malloc(sizeof(wifi_tuple_t))) != NULL )
                {
-                       strncpy(t->ifname, ucitmp, sizeof(t->ifname));
-                       val++;
-               }
-
-               ucitmp = ucix_get_option(itr->ctx, "wireless", section, "bssid");
-               if(ucitmp)
-               {
-                       strncpy(t->bssid, ucitmp, sizeof(t->bssid));
-                       val++;
-               }
+                       ucitmp = ucix_get_option(itr->ctx, "wireless", section, "ifname");
+                       if(ucitmp)
+                       {
+                               strncpy(t->ifname, ucitmp, sizeof(t->ifname));
+                               val++;
+                       }
 
-               ucitmp = ucix_get_option(itr->ctx, "wireless", section, "device");
-               if(ucitmp)
-               {
-                       ucitmp = ucix_get_option(itr->ctx, "wireless", ucitmp, "channel");
+                       ucitmp = ucix_get_option(itr->ctx, "wireless", section, "bssid");
                        if(ucitmp)
                        {
-                               t->channel = atoi(ucitmp);
+                               strncpy(t->bssid, ucitmp, sizeof(t->bssid));
                                val++;
                        }
-               }
-       
-               if( val == 3 )
-               {
-                       syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
-                               t->ifname, t->bssid, t->channel);
 
-                       t->next = itr->list;
-                       itr->list = t;
-               }
-               else
-               {
-                       free(t);
+                       ucitmp = ucix_get_option(itr->ctx, "wireless", section, "device");
+                       if(ucitmp)
+                       {
+                               ucitmp = ucix_get_option(itr->ctx, "wireless", ucitmp, "channel");
+                               if(ucitmp)
+                               {
+                                       t->channel = atoi(ucitmp);
+                                       val++;
+                               }
+                       }
+
+                       if( val == 3 )
+                       {
+                               syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d",
+                                       t->ifname, t->bssid, t->channel);
+
+                               t->next = itr->list;
+                               itr->list = t;
+                       }
+                       else
+                       {
+                               free(t);
+                       }
                }
        }
 }
@@ -217,12 +253,12 @@ static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr
 static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
 {
        struct uci_context *ctx;
-       struct uci_itr_ctx itr;
+       struct uci_wifi_iface_itr_ctx itr;
        wifi_tuple_t *cur, *next;
 
        if( check_uci_update("wireless", modtime) )
        {
-               syslog(LOG_INFO, "Config changed, reloading");
+               syslog(LOG_INFO, "Wireless config changed, reloading");
 
                if( (ctx = ucix_init("wireless")) != NULL )
                {
@@ -248,95 +284,232 @@ static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime)
        return ifs;
 }
 
+/* Add tuple */
+static void load_watchdog_uci_add_process(const char *section, struct uci_process_itr_ctx *itr)
+{
+       process_tuple_t *t;
+       const char *ucitmp;
+       int val = 0;
+
+       if( (t = (process_tuple_t *)malloc(sizeof(process_tuple_t))) != NULL )
+       {
+               t->restart = 0;
+
+               ucitmp = ucix_get_option(itr->ctx, "freifunk-watchdog", section, "process");
+               if(ucitmp)
+               {
+                       strncpy(t->process, ucitmp, sizeof(t->process));
+                       val++;
+               }
+
+               ucitmp = ucix_get_option(itr->ctx, "freifunk-watchdog", section, "initscript");
+               if(ucitmp)
+               {
+                       strncpy(t->initscript, ucitmp, sizeof(t->initscript));
+                       val++;
+               }
+
+               if( val == 2 )
+               {
+                       syslog(LOG_INFO, "Monitoring %s: initscript=%s",
+                               t->process, t->initscript);
+
+                               t->next = itr->list;
+                               itr->list = t;
+               }
+               else
+               {
+                       free(t);
+               }
+       }
+}
+
+/* Load config */
+static process_tuple_t * load_watchdog_uci(process_tuple_t *procs)
+{
+       struct uci_context *ctx;
+       struct uci_process_itr_ctx itr;
+       process_tuple_t *cur, *next;
+
+       syslog(LOG_INFO, "Loading watchdog config");
+
+       if( (ctx = ucix_init("freifunk-watchdog")) != NULL )
+       {
+               if( procs != NULL )
+               {
+                       for(cur = procs; cur; cur = next)
+                       {
+                               next = cur->next;
+                               free(cur);
+                       }
+               }
+
+               itr.list = NULL;
+               itr.ctx = ctx;
+
+               ucix_for_each_section_type(ctx, "freifunk-watchdog", "process",
+                       (void *)load_watchdog_uci_add_process, &itr);
+
+               return itr.list;
+       }
+
+       return procs;
+}
+
 /* Daemon implementation */
 static int do_daemon(void)
 {
+       static int wdtrigger = 1;
+       static int wdtimeout = BASE_INTERVAL * 2;
+       static const char wdkeepalive = WATCH_KEEPALIVE;
+
        int iwfd;
        int channel;
        char bssid[18];
+       struct sigaction sa;
 
-       wifi_tuple_t *ifs = NULL, *curif;
-       time_t modtime = 0;
+       wifi_tuple_t *ifs = NULL, *curr_if;
+       process_tuple_t *procs = NULL, *curr_proc;
+       time_t wireless_modtime = 0;
 
+       int action_intv = 0;
        int restart_wifi = 0;
-       int restart_cron = 0;
+       int loadavg_panic = 0;
 
        openlog(SYSLOG_IDENT, 0, LOG_DAEMON);
-       //daemon(1, 1);
+       memset(&sa, 0, sizeof(sa));
 
        if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 )
        {
-               perror("Can not open wireless control socket");
+               syslog(LOG_ERR, "Can not open wireless control socket: %s",
+                       strerror(errno));
+
                return 1;
        }
 
+       if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 )
+       {
+               syslog(LOG_INFO, "Opened %s - polling every %i seconds",
+                       WATCH_DEVICE, BASE_INTERVAL);
+
+               /* Install signal handler to halt watchdog on shutdown */
+               sa.sa_handler = shutdown_watchdog;
+               sa.sa_flags = SA_NOCLDWAIT | SA_RESTART;
+               sigaction(SIGHUP,  &sa, NULL);
+               sigaction(SIGINT,  &sa, NULL);
+               sigaction(SIGPIPE, &sa, NULL);
+               sigaction(SIGTERM, &sa, NULL);
+               sigaction(SIGUSR1, &sa, NULL);
+               sigaction(SIGUSR2, &sa, NULL);
+
+               /* Set watchdog timeout to twice the interval */
+               ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtimeout);
+       }
+
+       /* Install signal handler to reap children */
+       sa.sa_handler = sigchld_handler;
+       sa.sa_flags = 0;
+       sigaction(SIGCHLD, &sa, NULL);
+
+       /* Load watchdog configuration only once */
+       procs = load_watchdog_uci(procs);
+
        while( 1 )
        {
-               if( (ifs = load_wifi_uci(ifs, &modtime)) == NULL )
+               /* Check/increment action interval */
+               if( ++action_intv >= ACTION_INTERVAL )
                {
-                       printf("Can not load wireless uci. File corrupt?\n");
-                       return 1;
-               }
+                       /* Reset action interval */
+                       action_intv = 0;
 
-               /* Check crond */
-               if( find_process("crond") < 0 )
-               {
-                       syslog(LOG_WARNING, "The crond process died, restarting");
-                       restart_cron++;         
-               }
+                       /* Check average load */
+                       if( find_loadavg() >= LOAD_TRESHOLD )
+                               loadavg_panic++;
+                       else
+                               loadavg_panic = 0;
 
-               /* Check wireless interfaces */
-               for( curif = ifs; curif; curif = curif->next )
-               {
-                       /* Get current channel and bssid */
-                       if( (iw_get_bssid(iwfd, curif->ifname, bssid) == 0) &&
-                           (iw_get_channel(iwfd, curif->ifname, &channel) == 0) )
+                       /* Check wireless interfaces */
+                       ifs = load_wifi_uci(ifs, &wireless_modtime);
+                       for( curr_if = ifs; curr_if; curr_if = curr_if->next )
                        {
-                               /* Check BSSID */
-                               if( strcasecmp(bssid, curif->bssid) != 0 )
+                               /* Get current channel and bssid */
+                               if( (iw_get_bssid(iwfd, curr_if->ifname, bssid) == 0) &&
+                           (iw_get_channel(iwfd, curr_if->ifname, &channel) == 0) )
                                {
-                                       syslog(LOG_WARNING, "BSSID mismatch on %s: current=%s wanted=%s",
-                                               curif->ifname, bssid, curif->bssid);
-
-                                       restart_wifi++;
+                                       /* Check BSSID */
+                                       if( strcasecmp(bssid, curr_if->bssid) != 0 )
+                                       {
+                                               syslog(LOG_WARNING, "BSSID mismatch on %s: current=%s wanted=%s",
+                                                       curr_if->ifname, bssid, curr_if->bssid);
+
+                                               restart_wifi++;
+                                       }
+
+                                       /* Check channel */
+                                       else if( channel != curr_if->channel )
+                                       {
+                                               syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
+                                                       curr_if->ifname, channel, curr_if->channel);
+
+                                               restart_wifi++;
+                                       }
                                }
-
-                               /* Check channel */
-                               else if( channel != curif->channel )
+                               else
                                {
-                                       syslog(LOG_WARNING, "Channel mismatch on %s: current=%d wanted=%d",
-                                               curif->ifname, channel, curif->channel);
+                                       syslog(LOG_WARNING, "Requested interface %s not present", curr_if->ifname);
+                               }
+                       }
+
+                       /* Check processes */
+                       for( curr_proc = procs; curr_proc; curr_proc = curr_proc->next )
+                       {
+                               if( find_process(curr_proc->process) < 0 )
+                                       curr_proc->restart++;
+                               else
+                                       curr_proc->restart = 0;
 
-                                       restart_wifi++;
+                               /* Process restart required? */
+                               if( curr_proc->restart >= HYSTERESIS )
+                               {
+                                       curr_proc->restart = 0;
+                                       syslog(LOG_WARNING, "The %s process died, restarting", curr_proc->process);
+                                       EXEC(PROC_ACTION);
                                }
                        }
-                       else
+
+
+                       /* Wifi restart required? */
+                       if( restart_wifi >= HYSTERESIS )
                        {
-                               syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname);                               
+                               restart_wifi = 0;
+                               syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting");
+                               EXEC(WIFI_ACTION);
                        }
-               }
 
+                       /* Is there a load problem? */
+                       if( loadavg_panic >= HYSTERESIS )
+                       {
+                               syslog(LOG_EMERG, "Critical system load level, triggering reset!");
 
-               /* Wifi restart required? */
-               if( restart_wifi >= HYSTERESIS )
-               {
-                       restart_wifi = 0;
-                       syslog(LOG_WARNING, "Restarting wireless");
-                       EXEC(WIFI_ACTION);
+                               /* Try watchdog, fall back to reboot */
+                               if( wdfd > -1 )
+                                       ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger);
+                               else
+                                       EXEC(LOAD_ACTION);
+                       }
                }
 
-               /* Cron restart required? */
-               if( restart_cron >= HYSTERESIS )
-               {
-                       restart_cron = 0;
-                       syslog(LOG_WARNING, "Restarting crond process");
-                       EXEC(CRON_ACTION);      
-               }
 
-               sleep(INTERVAL);
+               /* Reset watchdog timer */
+               if( wdfd > -1 )
+                       write(wdfd, &wdkeepalive, 1);
+
+               sleep(BASE_INTERVAL);
        }
 
+       shutdown_watchdog(0);
        closelog();
+
        return 0;
 }