zcip: fix link-local IP conflict detection
[oweals/busybox.git] / networking / zcip.c
index b64e37c5848f103d07964155d48cab24d8dd5cc1..45d1f7c1c25cf148825052075a0d20ea15eb9751 100644 (file)
@@ -1,3 +1,4 @@
+/* vi: set sw=4 ts=4: */
 /*
  * RFC3927 ZeroConf IPv4 Link-Local addressing
  * (see <http://www.zeroconf.org/>)
@@ -5,20 +6,16 @@
  * Copyright (C) 2003 by Arthur van Hoff (avh@strangeberry.com)
  * Copyright (C) 2004 by David Brownell
  *
- * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  */
 
 /*
- * This can build as part of BusyBox or by itself:
- *
- *     $(CROSS_COMPILE)cc -Os -Wall -DNO_BUSYBOX -DDEBUG -o zcip zcip.c
- *
  * ZCIP just manages the 169.254.*.* addresses.  That network is not
  * routed at the IP level, though various proxies or bridges can
  * certainly be used.  Its naming is built over multicast DNS.
  */
 
-// #define      DEBUG
+//#define DEBUG
 
 // TODO:
 // - more real-world usage/testing, especially daemon mode
 // - avoid silent script failures, especially under load...
 // - link status monitoring (restart on link-up; stop on link-down)
 
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <syslog.h>
-#include <poll.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-#include <sys/socket.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
+//usage:#define zcip_trivial_usage
+//usage:       "[OPTIONS] IFACE SCRIPT"
+//usage:#define zcip_full_usage "\n\n"
+//usage:       "Manage a ZeroConf IPv4 link-local address\n"
+//usage:     "\n       -f              Run in foreground"
+//usage:     "\n       -q              Quit after obtaining address"
+//usage:     "\n       -r 169.254.x.x  Request this address first"
+//usage:     "\n       -v              Verbose"
+//usage:     "\n"
+//usage:     "\nWith no -q, runs continuously monitoring for ARP conflicts,"
+//usage:     "\nexits only on I/O errors (link down etc)"
+
+#include "libbb.h"
 #include <netinet/ether.h>
-#include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
-
-#include <linux/if_packet.h>
 #include <linux/sockios.h>
 
+#include <syslog.h>
+
+/* We don't need more than 32 bits of the counter */
+#define MONOTONIC_US() ((unsigned)monotonic_us())
 
 struct arp_packet {
-       struct ether_header hdr;
-       // FIXME this part is netinet/if_ether.h "struct ether_arp"
-       struct arphdr arp;
-       struct ether_addr source_addr;
-       struct in_addr source_ip;
-       struct ether_addr target_addr;
-       struct in_addr target_ip;
-} ATTRIBUTE_PACKED;
+       struct ether_header eth;
+       struct ether_arp arp;
+} PACKED;
 
+enum {
 /* 169.254.0.0 */
-static const uint32_t LINKLOCAL_ADDR = 0xa9fe0000;
+       LINKLOCAL_ADDR = 0xa9fe0000,
 
 /* protocol timeout parameters, specified in seconds */
-static const unsigned PROBE_WAIT = 1;
-static const unsigned PROBE_MIN = 1;
-static const unsigned PROBE_MAX = 2;
-static const unsigned PROBE_NUM = 3;
-static const unsigned MAX_CONFLICTS = 10;
-static const unsigned RATE_LIMIT_INTERVAL = 60;
-static const unsigned ANNOUNCE_WAIT = 2;
-static const unsigned ANNOUNCE_NUM = 2;
-static const unsigned ANNOUNCE_INTERVAL = 2;
-static const time_t DEFEND_INTERVAL = 10;
-
-static const unsigned char ZCIP_VERSION[] = "0.75 (18 April 2005)";
-static char *prog;
-
-static const struct in_addr null_ip = { 0 };
-static const struct ether_addr null_addr = { {0, 0, 0, 0, 0, 0} };
-
-static int verbose = 0;
-
-#ifdef DEBUG
-
-#define DBG(fmt,args...) \
-       fprintf(stderr, "%s: " fmt , prog , ## args)
-#define VDBG(fmt,args...) do { \
-       if (verbose) fprintf(stderr, "%s: " fmt , prog ,## args); \
-       } while (0)
-#else
+       PROBE_WAIT = 1,
+       PROBE_MIN = 1,
+       PROBE_MAX = 2,
+       PROBE_NUM = 3,
+       MAX_CONFLICTS = 10,
+       RATE_LIMIT_INTERVAL = 60,
+       ANNOUNCE_WAIT = 2,
+       ANNOUNCE_NUM = 2,
+       ANNOUNCE_INTERVAL = 2,
+       DEFEND_INTERVAL = 10
+};
+
+/* States during the configuration process. */
+enum {
+       PROBE = 0,
+       RATE_LIMIT_PROBE,
+       ANNOUNCE,
+       MONITOR,
+       DEFEND
+};
+
+#define VDBG(...) do { } while (0)
+
+
+enum {
+       sock_fd = 3
+};
+
+struct globals {
+       struct sockaddr saddr;
+       struct ether_addr eth_addr;
+} FIX_ALIASING;
+#define G (*(struct globals*)&bb_common_bufsiz1)
+#define saddr    (G.saddr   )
+#define eth_addr (G.eth_addr)
+#define INIT_G() do { } while (0)
 
-#define DBG(fmt,args...) \
-       do { } while (0)
-#define VDBG   DBG
-#endif                         /* DEBUG */
 
 /**
  * Pick a random link local IP address on 169.254/16, except that
  * the first and last 256 addresses are reserved.
  */
-static void
-pick(struct in_addr *ip)
+static uint32_t pick(void)
 {
-       unsigned        tmp;
+       unsigned tmp;
 
-       /* use cheaper math than lrand48() mod N */
        do {
-               tmp = (lrand48() >> 16) & IN_CLASSB_HOST;
+               tmp = rand() & IN_CLASSB_HOST;
        } while (tmp > (IN_CLASSB_HOST - 0x0200));
-       ip->s_addr = htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
+       return htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
 }
 
 /**
  * Broadcast an ARP packet.
  */
-static int
-arp(int fd, struct sockaddr *saddr, int op,
-       const struct ether_addr *source_addr, struct in_addr source_ip,
-       const struct ether_addr *target_addr, struct in_addr target_ip)
+static void arp(
+       /* int op, - always ARPOP_REQUEST */
+       /* const struct ether_addr *source_eth, - always &eth_addr */
+                                       struct in_addr source_ip,
+       const struct ether_addr *target_eth, struct in_addr target_ip)
 {
+       enum { op = ARPOP_REQUEST };
+#define source_eth (&eth_addr)
+
        struct arp_packet p;
+       memset(&p, 0, sizeof(p));
 
        // ether header
-       p.hdr.ether_type = htons(ETHERTYPE_ARP);
-       memcpy(p.hdr.ether_shost, source_addr, ETH_ALEN);
-       memset(p.hdr.ether_dhost, 0xff, ETH_ALEN);
+       p.eth.ether_type = htons(ETHERTYPE_ARP);
+       memcpy(p.eth.ether_shost, source_eth, ETH_ALEN);
+       memset(p.eth.ether_dhost, 0xff, ETH_ALEN);
 
        // arp request
-       p.arp.ar_hrd = htons(ARPHRD_ETHER);
-       p.arp.ar_pro = htons(ETHERTYPE_IP);
-       p.arp.ar_hln = ETH_ALEN;
-       p.arp.ar_pln = 4;
-       p.arp.ar_op = htons(op);
-       memcpy(&p.source_addr, source_addr, ETH_ALEN);
-       memcpy(&p.source_ip, &source_ip, sizeof (p.source_ip));
-       memcpy(&p.target_addr, target_addr, ETH_ALEN);
-       memcpy(&p.target_ip, &target_ip, sizeof (p.target_ip));
+       p.arp.arp_hrd = htons(ARPHRD_ETHER);
+       p.arp.arp_pro = htons(ETHERTYPE_IP);
+       p.arp.arp_hln = ETH_ALEN;
+       p.arp.arp_pln = 4;
+       p.arp.arp_op = htons(op);
+       memcpy(&p.arp.arp_sha, source_eth, ETH_ALEN);
+       memcpy(&p.arp.arp_spa, &source_ip, sizeof(p.arp.arp_spa));
+       memcpy(&p.arp.arp_tha, target_eth, ETH_ALEN);
+       memcpy(&p.arp.arp_tpa, &target_ip, sizeof(p.arp.arp_tpa));
 
        // send it
-       if (sendto(fd, &p, sizeof (p), 0, saddr, sizeof (*saddr)) < 0) {
-               perror("sendto");
-               return -errno;
-       }
-       return 0;
+       // Even though sock_fd is already bound to saddr, just send()
+       // won't work, because "socket is not connected"
+       // (and connect() won't fix that, "operation not supported").
+       // Thus we sendto() to saddr. I wonder which sockaddr
+       // (from bind() or from sendto()?) kernel actually uses
+       // to determine iface to emit the packet from...
+       xsendto(sock_fd, &p, sizeof(p), &saddr, sizeof(saddr));
+#undef source_eth
 }
 
 /**
  * Run a script.
+ * argv[0]:intf argv[1]:script_name argv[2]:junk argv[3]:NULL
  */
-static int
-run(char *script, char *arg, char *intf, struct in_addr *ip)
+static int run(char *argv[3], const char *param, struct in_addr *ip)
 {
-       int pid, status;
-       char *why;
-
-       if (script != NULL) {
-               VDBG("%s run %s %s\n", intf, script, arg);
-               if (ip != NULL) {
-                       char *addr = inet_ntoa(*ip);
-                       setenv("ip", addr, 1);
-                       syslog(LOG_INFO, "%s %s %s", arg, intf, addr);
-               }
+       int status;
+       char *addr = addr; /* for gcc */
+       const char *fmt = "%s %s %s" + 3;
 
-               pid = vfork();
-               if (pid < 0) {                  // error
-                       why = "vfork";
-                       goto bad;
-               } else if (pid == 0) {          // child
-                       execl(script, script, arg, NULL);
-                       perror("execl");
-                       _exit(EXIT_FAILURE);
-               }
+       argv[2] = (char*)param;
 
-               if (waitpid(pid, &status, 0) <= 0) {
-                       why = "waitpid";
-                       goto bad;
-               }
-               if (WEXITSTATUS(status) != 0) {
-                       fprintf(stderr, "%s: script %s failed, exit=%d\n",
-                                       prog, script, WEXITSTATUS(status));
-                       return -errno;
-               }
-       }
-       return 0;
-bad:
-       status = -errno;
-       syslog(LOG_ERR, "%s %s, %s error: %s",
-               arg, intf, why, strerror(errno));
-       return status;
-}
+       VDBG("%s run %s %s\n", argv[0], argv[1], argv[2]);
 
-#ifndef        NO_BUSYBOX
-#include "busybox.h"
-#endif
+       if (ip) {
+               addr = inet_ntoa(*ip);
+               xsetenv("ip", addr);
+               fmt -= 3;
+       }
+       bb_info_msg(fmt, argv[2], argv[0], addr);
 
-/**
- * Print usage information.
- */
-static void ATTRIBUTE_NORETURN
-zcip_usage(const char *msg)
-{
-       fprintf(stderr, "%s: %s\n", prog, msg);
-#ifdef NO_BUSYBOX
-       fprintf(stderr, "Usage: %s [OPTIONS] ifname script\n"
-                       "\t-f              foreground mode (implied by -v)\n"
-                       "\t-q              quit after address (no daemon)\n"
-                       "\t-r 169.254.x.x  request this address first\n"
-                       "\t-v              verbose; show version\n",
-                       prog);
-       exit(0);
-#else
-       bb_show_usage();
-#endif
+       status = spawn_and_wait(argv + 1);
+       if (status < 0) {
+               bb_perror_msg("%s %s %s" + 3, argv[2], argv[0]);
+               return -errno;
+       }
+       if (status != 0)
+               bb_error_msg("script %s %s failed, exitcode=%d", argv[1], argv[2], status & 0xff);
+       return status;
 }
 
 /**
  * Return milliseconds of random delay, up to "secs" seconds.
  */
-static inline unsigned
-ms_rdelay(unsigned secs)
+static ALWAYS_INLINE unsigned random_delay_ms(unsigned secs)
 {
-       return lrand48() % (secs * 1000);
+       return rand() % (secs * 1000);
 }
 
 /**
  * main program
  */
-
-#ifdef NO_BUSYBOX
-int
-main(int argc, char *argv[])
-       __attribute__ ((weak, alias ("zcip_main")));
-#endif
-
-int zcip_main(int argc, char *argv[])
+int zcip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int zcip_main(int argc UNUSED_PARAM, char **argv)
 {
-       char *intf = NULL;
-       char *script = NULL;
-       int quit = 0;
-       int foreground = 0;
-
-       char *why;
-       struct sockaddr saddr;
-       struct ether_addr addr;
-       struct in_addr ip = { 0 };
-       int fd;
-       int ready = 0;
-       suseconds_t timeout = 0;        // milliseconds
-       time_t defend = 0;
-       unsigned conflicts = 0;
-       unsigned nprobes = 0;
-       unsigned nclaims = 0;
-       int t;
-
+       int state;
+       char *r_opt;
+       unsigned opts;
+
+       // ugly trick, but I want these zeroed in one go
+       struct {
+               const struct in_addr null_ip;
+               const struct ether_addr null_addr;
+               struct in_addr ip;
+               struct ifreq ifr;
+               int timeout_ms; /* must be signed */
+               unsigned conflicts;
+               unsigned nprobes;
+               unsigned nclaims;
+               int ready;
+               int verbose;
+       } L;
+#define null_ip    (L.null_ip   )
+#define null_addr  (L.null_addr )
+#define ip         (L.ip        )
+#define ifr        (L.ifr       )
+#define timeout_ms (L.timeout_ms)
+#define conflicts  (L.conflicts )
+#define nprobes    (L.nprobes   )
+#define nclaims    (L.nclaims   )
+#define ready      (L.ready     )
+#define verbose    (L.verbose   )
+
+       memset(&L, 0, sizeof(L));
+       INIT_G();
+
+#define FOREGROUND (opts & 1)
+#define QUIT       (opts & 2)
        // parse commandline: prog [options] ifname script
-       prog = argv[0];
-       while ((t = getopt(argc, argv, "fqr:v")) != EOF) {
-               switch (t) {
-               case 'f':
-                       foreground = 1;
-                       continue;
-               case 'q':
-                       quit = 1;
-                       continue;
-               case 'r':
-                       if (inet_aton(optarg, &ip) == 0
-                                       || (ntohl(ip.s_addr) & IN_CLASSB_NET)
-                                               != LINKLOCAL_ADDR) {
-                               zcip_usage("invalid link address");
-                       }
-                       continue;
-               case 'v':
-                       if (!verbose)
-                               printf("%s: version %s\n", prog, ZCIP_VERSION);
-                       verbose++;
-                       foreground = 1;
-                       continue;
-               default:
-                       zcip_usage("bad option");
-               }
+       // exactly 2 args; -v accumulates and implies -f
+       opt_complementary = "=2:vv:vf";
+       opts = getopt32(argv, "fqr:v", &r_opt, &verbose);
+#if !BB_MMU
+       // on NOMMU reexec early (or else we will rerun things twice)
+       if (!FOREGROUND)
+               bb_daemonize_or_rexec(0 /*was: DAEMON_CHDIR_ROOT*/, argv);
+#endif
+       // open an ARP socket
+       // (need to do it before openlog to prevent openlog from taking
+       // fd 3 (sock_fd==3))
+       xmove_fd(xsocket(AF_PACKET, SOCK_PACKET, htons(ETH_P_ARP)), sock_fd);
+       if (!FOREGROUND) {
+               // do it before all bb_xx_msg calls
+               openlog(applet_name, 0, LOG_DAEMON);
+               logmode |= LOGMODE_SYSLOG;
        }
-       if (optind < argc - 1) {
-               intf = argv[optind++];
-               setenv("interface", intf, 1);
-               script = argv[optind++];
+       if (opts & 4) { // -r n.n.n.n
+               if (inet_aton(r_opt, &ip) == 0
+                || (ntohl(ip.s_addr) & IN_CLASSB_NET) != LINKLOCAL_ADDR
+               ) {
+                       bb_error_msg_and_die("invalid link address");
+               }
        }
-       if (optind != argc || !intf)
-               zcip_usage("wrong number of arguments");
-       openlog(prog, 0, LOG_DAEMON);
+       argv += optind - 1;
+
+       /* Now: argv[0]:junk argv[1]:intf argv[2]:script argv[3]:NULL */
+       /* We need to make space for script argument: */
+       argv[0] = argv[1];
+       argv[1] = argv[2];
+       /* Now: argv[0]:intf argv[1]:script argv[2]:junk argv[3]:NULL */
+#define argv_intf (argv[0])
+
+       xsetenv("interface", argv_intf);
 
        // initialize the interface (modprobe, ifup, etc)
-       if (run(script, "init", intf, NULL) < 0)
+       if (run(argv, "init", NULL))
                return EXIT_FAILURE;
 
        // initialize saddr
-       memset(&saddr, 0, sizeof (saddr));
-       strncpy(saddr.sa_data, intf, sizeof (saddr.sa_data));
+       // saddr is: { u16 sa_family; u8 sa_data[14]; }
+       //memset(&saddr, 0, sizeof(saddr));
+       //TODO: are we leaving sa_family == 0 (AF_UNSPEC)?!
+       safe_strncpy(saddr.sa_data, argv_intf, sizeof(saddr.sa_data));
 
-       // open an ARP socket
-       if ((fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ARP))) < 0) {
-               why = "open";
-fail:
-               foreground = 1;
-               goto bad;
-       }
        // bind to the interface's ARP socket
-       if (bind(fd, &saddr, sizeof (saddr)) < 0) {
-               why = "bind";
-               goto fail;
-       } else {
-               struct ifreq ifr;
-               short seed[3];
-
-               // get the interface's ethernet address
-               memset(&ifr, 0, sizeof (ifr));
-               strncpy(ifr.ifr_name, intf, sizeof (ifr.ifr_name));
-               if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
-                       why = "get ethernet address";
-                       goto fail;
-               }
-               memcpy(&addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
-
-               // start with some stable ip address, either a function of
-               // the hardware address or else the last address we used.
-               // NOTE: the sequence of addresses we try changes only
-               // depending on when we detect conflicts.
-               memcpy(seed, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
-               seed48(seed);
-               if (ip.s_addr == 0)
-                       pick(&ip);
+       xbind(sock_fd, &saddr, sizeof(saddr));
+
+       // get the interface's ethernet address
+       //memset(&ifr, 0, sizeof(ifr));
+       strncpy_IFNAMSIZ(ifr.ifr_name, argv_intf);
+       xioctl(sock_fd, SIOCGIFHWADDR, &ifr);
+       memcpy(&eth_addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+       // start with some stable ip address, either a function of
+       // the hardware address or else the last address we used.
+       // we are taking low-order four bytes, as top-order ones
+       // aren't random enough.
+       // NOTE: the sequence of addresses we try changes only
+       // depending on when we detect conflicts.
+       {
+               uint32_t t;
+               move_from_unaligned32(t, ((char *)&eth_addr + 2));
+               srand(t);
        }
+       if (ip.s_addr == 0)
+               ip.s_addr = pick();
 
        // FIXME cases to handle:
        //  - zcip already running!
        //  - link already has local address... just defend/update
 
        // daemonize now; don't delay system startup
-       if (!foreground) {
-               if (daemon(0, verbose) < 0) {
-                       why = "daemon";
-                       goto bad;
-               }
-               syslog(LOG_INFO, "start, interface %s", intf);
+       if (!FOREGROUND) {
+#if BB_MMU
+               bb_daemonize(0 /*was: DAEMON_CHDIR_ROOT*/);
+#endif
+               bb_info_msg("start, interface %s", argv_intf);
        }
 
        // run the dynamic address negotiation protocol,
        // restarting after address conflicts:
        //  - start with some address we want to try
        //  - short random delay
-       //  - arp probes to see if another host else uses it
+       //  - arp probes to see if another host uses it
        //  - arp announcements that we're claiming it
        //  - use it
        //  - defend it, within limits
+       // exit if:
+       // - address is successfully obtained and -q was given:
+       //   run "<script> config", then exit with exitcode 0
+       // - poll error (when does this happen?)
+       // - read error (when does this happen?)
+       // - sendto error (in arp()) (when does this happen?)
+       // - revents & POLLERR (link down). run "<script> deconfig" first
+       state = PROBE;
        while (1) {
                struct pollfd fds[1];
-               struct timeval tv1;
+               unsigned deadline_us;
                struct arp_packet p;
+               int source_ip_conflict;
+               int target_ip_conflict;
 
-               fds[0].fd = fd;
+               fds[0].fd = sock_fd;
                fds[0].events = POLLIN;
                fds[0].revents = 0;
 
                // poll, being ready to adjust current timeout
-               if (timeout > 0) {
-                       gettimeofday(&tv1, NULL);
-                       tv1.tv_usec += (timeout % 1000) * 1000;
-                       while (tv1.tv_usec > 1000000) {
-                               tv1.tv_usec -= 1000000;
-                               tv1.tv_sec++;
-                       }
-                       tv1.tv_sec += timeout / 1000;
-               } else if (timeout == 0) {
-                       timeout = ms_rdelay(PROBE_WAIT);
-                       // FIXME setsockopt(fd, SO_ATTACH_FILTER, ...) to
+               if (!timeout_ms) {
+                       timeout_ms = random_delay_ms(PROBE_WAIT);
+                       // FIXME setsockopt(sock_fd, SO_ATTACH_FILTER, ...) to
                        // make the kernel filter out all packets except
                        // ones we'd care about.
                }
-               VDBG("...wait %ld %s nprobes=%d, nclaims=%d\n",
-                               timeout, intf, nprobes, nclaims);
-               switch (poll(fds, 1, timeout)) {
+               // set deadline_us to the point in time when we timeout
+               deadline_us = MONOTONIC_US() + timeout_ms * 1000;
+
+               VDBG("...wait %d %s nprobes=%u, nclaims=%u\n",
+                               timeout_ms, argv_intf, nprobes, nclaims);
+
+               switch (safe_poll(fds, 1, timeout_ms)) {
 
-               // timeouts trigger protocol transitions
+               default:
+                       //bb_perror_msg("poll"); - done in safe_poll
+                       return EXIT_FAILURE;
+
+               // timeout
                case 0:
-                       // probes
-                       if (nprobes < PROBE_NUM) {
-                               nprobes++;
-                               VDBG("probe/%d %s@%s\n",
-                                               nprobes, intf, inet_ntoa(ip));
-                               (void)arp(fd, &saddr, ARPOP_REQUEST,
-                                               &addr, null_ip,
-                                               &null_addr, ip);
+                       VDBG("state = %d\n", state);
+                       switch (state) {
+                       case PROBE:
+                               // timeouts in the PROBE state mean no conflicting ARP packets
+                               // have been received, so we can progress through the states
                                if (nprobes < PROBE_NUM) {
-                                       timeout = PROBE_MIN * 1000;
-                                       timeout += ms_rdelay(PROBE_MAX
-                                                       - PROBE_MIN);
-                               } else
-                                       timeout = ANNOUNCE_WAIT * 1000;
-                       }
-                       // then announcements
-                       else if (nclaims < ANNOUNCE_NUM) {
-                               nclaims++;
-                               VDBG("announce/%d %s@%s\n",
-                                               nclaims, intf, inet_ntoa(ip));
-                               (void)arp(fd, &saddr, ARPOP_REQUEST,
-                                               &addr, ip,
-                                               &addr, ip);
+                                       nprobes++;
+                                       VDBG("probe/%u %s@%s\n",
+                                                       nprobes, argv_intf, inet_ntoa(ip));
+                                       timeout_ms = PROBE_MIN * 1000;
+                                       timeout_ms += random_delay_ms(PROBE_MAX - PROBE_MIN);
+                                       arp(/* ARPOP_REQUEST, */
+                                                       /* &eth_addr, */ null_ip,
+                                                       &null_addr, ip);
+                               }
+                               else {
+                                       // Switch to announce state.
+                                       state = ANNOUNCE;
+                                       nclaims = 0;
+                                       VDBG("announce/%u %s@%s\n",
+                                                       nclaims, argv_intf, inet_ntoa(ip));
+                                       timeout_ms = ANNOUNCE_INTERVAL * 1000;
+                                       arp(/* ARPOP_REQUEST, */
+                                                       /* &eth_addr, */ ip,
+                                                       &eth_addr, ip);
+                               }
+                               break;
+                       case RATE_LIMIT_PROBE:
+                               // timeouts in the RATE_LIMIT_PROBE state mean no conflicting ARP packets
+                               // have been received, so we can move immediately to the announce state
+                               state = ANNOUNCE;
+                               nclaims = 0;
+                               VDBG("announce/%u %s@%s\n",
+                                               nclaims, argv_intf, inet_ntoa(ip));
+                               timeout_ms = ANNOUNCE_INTERVAL * 1000;
+                               arp(/* ARPOP_REQUEST, */
+                                               /* &eth_addr, */ ip,
+                                               &eth_addr, ip);
+                               break;
+                       case ANNOUNCE:
+                               // timeouts in the ANNOUNCE state mean no conflicting ARP packets
+                               // have been received, so we can progress through the states
                                if (nclaims < ANNOUNCE_NUM) {
-                                       timeout = ANNOUNCE_INTERVAL * 1000;
-                               } else {
+                                       nclaims++;
+                                       VDBG("announce/%u %s@%s\n",
+                                                       nclaims, argv_intf, inet_ntoa(ip));
+                                       timeout_ms = ANNOUNCE_INTERVAL * 1000;
+                                       arp(/* ARPOP_REQUEST, */
+                                                       /* &eth_addr, */ ip,
+                                                       &eth_addr, ip);
+                               }
+                               else {
+                                       // Switch to monitor state.
+                                       state = MONITOR;
                                        // link is ok to use earlier
-                                       run(script, "config", intf, &ip);
+                                       // FIXME update filters
+                                       run(argv, "config", &ip);
                                        ready = 1;
                                        conflicts = 0;
-                                       timeout = -1;
+                                       timeout_ms = -1; // Never timeout in the monitor state.
 
-                                       // NOTE:  all other exit paths
+                                       // NOTE: all other exit paths
                                        // should deconfig ...
-                                       if (quit)
+                                       if (QUIT)
                                                return EXIT_SUCCESS;
-                                       // FIXME update filters
                                }
-                       }
-                       break;
+                               break;
+                       case DEFEND:
+                               // We won!  No ARP replies, so just go back to monitor.
+                               state = MONITOR;
+                               timeout_ms = -1;
+                               conflicts = 0;
+                               break;
+                       default:
+                               // Invalid, should never happen.  Restart the whole protocol.
+                               state = PROBE;
+                               ip.s_addr = pick();
+                               timeout_ms = 0;
+                               nprobes = 0;
+                               nclaims = 0;
+                               break;
+                       } // switch (state)
+                       break; // case 0 (timeout)
 
-               // packets arriving
+               // packets arriving, or link went down
                case 1:
-                       // maybe adjust timeout
-                       if (timeout > 0) {
-                               struct timeval tv2;
-
-                               gettimeofday(&tv2, NULL);
-                               if (timercmp(&tv1, &tv2, <)) {
-                                       timeout = -1;
+                       // We need to adjust the timeout in case we didn't receive
+                       // a conflicting packet.
+                       if (timeout_ms > 0) {
+                               unsigned diff = deadline_us - MONOTONIC_US();
+                               if ((int)(diff) < 0) {
+                                       // Current time is greater than the expected timeout time.
+                                       // Should never happen.
+                                       VDBG("missed an expected timeout\n");
+                                       timeout_ms = 0;
                                } else {
-                                       timersub(&tv1, &tv2, &tv1);
-                                       timeout = 1000 * tv1.tv_sec
-                                                       + tv1.tv_usec / 1000;
+                                       VDBG("adjusting timeout\n");
+                                       timeout_ms = (diff / 1000) | 1; /* never 0 */
                                }
                        }
+
                        if ((fds[0].revents & POLLIN) == 0) {
                                if (fds[0].revents & POLLERR) {
                                        // FIXME: links routinely go down;
                                        // this shouldn't necessarily exit.
-                                       fprintf(stderr, "%s %s: poll error\n",
-                                                       prog, intf);
+                                       bb_error_msg("iface %s is down", argv_intf);
                                        if (ready) {
-                                               run(script, "deconfig",
-                                                               intf, &ip);
+                                               run(argv, "deconfig", &ip);
                                        }
                                        return EXIT_FAILURE;
                                }
                                continue;
                        }
+
                        // read ARP packet
-                       if (recv(fd, &p, sizeof (p), 0) < 0) {
-                               why = "recv";
-                               goto bad;
+                       if (safe_read(sock_fd, &p, sizeof(p)) < 0) {
+                               bb_perror_msg_and_die(bb_msg_read_error);
                        }
-                       if (p.hdr.ether_type != htons(ETHERTYPE_ARP))
+                       if (p.eth.ether_type != htons(ETHERTYPE_ARP))
                                continue;
-
-                       VDBG("%s recv arp type=%d, op=%d,\n",
-                                       intf, ntohs(p.hdr.ether_type),
-                                       ntohs(p.arp.ar_op));
-                       VDBG("\tsource=%s %s\n",
-                                       ether_ntoa(&p.source_addr),
-                                       inet_ntoa(p.source_ip));
-                       VDBG("\ttarget=%s %s\n",
-                                       ether_ntoa(&p.target_addr),
-                                       inet_ntoa(p.target_ip));
-                       if (p.arp.ar_op != htons(ARPOP_REQUEST)
-                                       && p.arp.ar_op != htons(ARPOP_REPLY))
+#ifdef DEBUG
+                       {
+                               struct ether_addr *sha = (struct ether_addr *) p.arp.arp_sha;
+                               struct ether_addr *tha = (struct ether_addr *) p.arp.arp_tha;
+                               struct in_addr *spa = (struct in_addr *) p.arp.arp_spa;
+                               struct in_addr *tpa = (struct in_addr *) p.arp.arp_tpa;
+                               VDBG("%s recv arp type=%d, op=%d,\n",
+                                       argv_intf, ntohs(p.eth.ether_type),
+                                       ntohs(p.arp.arp_op));
+                               VDBG("\tsource=%s %s\n",
+                                       ether_ntoa(sha),
+                                       inet_ntoa(*spa));
+                               VDBG("\ttarget=%s %s\n",
+                                       ether_ntoa(tha),
+                                       inet_ntoa(*tpa));
+                       }
+#endif
+                       if (p.arp.arp_op != htons(ARPOP_REQUEST)
+                        && p.arp.arp_op != htons(ARPOP_REPLY)
+                       ) {
                                continue;
+                       }
+
+                       source_ip_conflict = 0;
+                       target_ip_conflict = 0;
+
+                       if (memcmp(&p.arp.arp_sha, &eth_addr, ETH_ALEN) != 0) {
+                               if (memcmp(p.arp.arp_spa, &ip.s_addr, sizeof(struct in_addr))) {
+                                       /* A probe or reply with source_ip == chosen ip */
+                                       source_ip_conflict = 1;
+                               }
+                               if (p.arp.arp_op == htons(ARPOP_REQUEST)
+                                && memcmp(p.arp.arp_spa, &null_ip, sizeof(struct in_addr)) == 0
+                                && memcmp(p.arp.arp_tpa, &ip.s_addr, sizeof(struct in_addr)) == 0
+                               ) {
+                                       /* A probe with source_ip == 0.0.0.0, target_ip == chosen ip:
+                                        * another host trying to claim this ip!
+                                        */
+                                       target_ip_conflict = 1;
+                               }
+                       }
 
-                       // some cases are always conflicts
-                       if ((p.source_ip.s_addr == ip.s_addr)
-                                       && (memcmp(&addr, &p.source_addr,
-                                                       ETH_ALEN) != 0)) {
-collision:
-                               VDBG("%s ARP conflict from %s\n", intf,
-                                               ether_ntoa(&p.source_addr));
-                               if (ready) {
-                                       time_t now = time(0);
-
-                                       if ((defend + DEFEND_INTERVAL)
-                                                       < now) {
-                                               defend = now;
-                                               (void)arp(fd, &saddr,
-                                                               ARPOP_REQUEST,
-                                                               &addr, ip,
-                                                               &addr, ip);
-                                               VDBG("%s defend\n", intf);
-                                               timeout = -1;
-                                               continue;
+                       VDBG("state = %d, source ip conflict = %d, target ip conflict = %d\n",
+                               state, source_ip_conflict, target_ip_conflict);
+                       switch (state) {
+                       case PROBE:
+                       case ANNOUNCE:
+                               // When probing or announcing, check for source IP conflicts
+                               // and other hosts doing ARP probes (target IP conflicts).
+                               if (source_ip_conflict || target_ip_conflict) {
+                                       conflicts++;
+                                       if (conflicts >= MAX_CONFLICTS) {
+                                               VDBG("%s ratelimit\n", argv_intf);
+                                               timeout_ms = RATE_LIMIT_INTERVAL * 1000;
+                                               state = RATE_LIMIT_PROBE;
                                        }
-                                       defend = now;
-                                       ready = 0;
-                                       run(script, "deconfig", intf, &ip);
-                                       // FIXME rm filters: setsockopt(fd,
-                                       // SO_DETACH_FILTER, ...)
+
+                                       // restart the whole protocol
+                                       ip.s_addr = pick();
+                                       timeout_ms = 0;
+                                       nprobes = 0;
+                                       nclaims = 0;
+                               }
+                               break;
+                       case MONITOR:
+                               // If a conflict, we try to defend with a single ARP probe.
+                               if (source_ip_conflict) {
+                                       VDBG("monitor conflict -- defending\n");
+                                       state = DEFEND;
+                                       timeout_ms = DEFEND_INTERVAL * 1000;
+                                       arp(/* ARPOP_REQUEST, */
+                                               /* &eth_addr, */ ip,
+                                               &eth_addr, ip);
                                }
-                               conflicts++;
-                               if (conflicts >= MAX_CONFLICTS) {
-                                       VDBG("%s ratelimit\n", intf);
-                                       sleep(RATE_LIMIT_INTERVAL);
+                               break;
+                       case DEFEND:
+                               // Well, we tried.  Start over (on conflict).
+                               if (source_ip_conflict) {
+                                       state = PROBE;
+                                       VDBG("defend conflict -- starting over\n");
+                                       ready = 0;
+                                       run(argv, "deconfig", &ip);
+
+                                       // restart the whole protocol
+                                       ip.s_addr = pick();
+                                       timeout_ms = 0;
+                                       nprobes = 0;
+                                       nclaims = 0;
                                }
-                               // restart the whole protocol
-                               pick(&ip);
-                               timeout = 0;
+                               break;
+                       default:
+                               // Invalid, should never happen.  Restart the whole protocol.
+                               VDBG("invalid state -- starting over\n");
+                               state = PROBE;
+                               ip.s_addr = pick();
+                               timeout_ms = 0;
                                nprobes = 0;
                                nclaims = 0;
-                       }
-                       // two hosts probing one address is a collision too
-                       else if (p.target_ip.s_addr == ip.s_addr
-                                       && nclaims == 0
-                                       && p.arp.ar_op == htons(ARPOP_REQUEST)
-                                       && memcmp(&addr, &p.target_addr,
-                                                       ETH_ALEN) != 0) {
-                               goto collision;
-                       }
-                       break;
-
-               default:
-                       why = "poll";
-                       goto bad;
-               }
-       }
-bad:
-       if (foreground)
-               perror(why);
-       else
-               syslog(LOG_ERR, "%s %s, %s error: %s",
-                       prog, intf, why, strerror(errno));
-       return EXIT_FAILURE;
+                               break;
+                       } // switch state
+                       break; // case 1 (packets arriving)
+               } // switch poll
+       } // while (1)
+#undef argv_intf
 }