xatonum.h: add comment
[oweals/busybox.git] / networking / zcip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * RFC3927 ZeroConf IPv4 Link-Local addressing
4  * (see <http://www.zeroconf.org/>)
5  *
6  * Copyright (C) 2003 by Arthur van Hoff (avh@strangeberry.com)
7  * Copyright (C) 2004 by David Brownell
8  *
9  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
10  */
11
12 /*
13  * ZCIP just manages the 169.254.*.* addresses.  That network is not
14  * routed at the IP level, though various proxies or bridges can
15  * certainly be used.  Its naming is built over multicast DNS.
16  */
17
18 //#define DEBUG
19
20 // TODO:
21 // - more real-world usage/testing, especially daemon mode
22 // - kernel packet filters to reduce scheduling noise
23 // - avoid silent script failures, especially under load...
24 // - link status monitoring (restart on link-up; stop on link-down)
25
26 #include <syslog.h>
27 #include <poll.h>
28 #include <sys/wait.h>
29 #include <netinet/ether.h>
30 #include <net/ethernet.h>
31 #include <net/if.h>
32 #include <net/if_arp.h>
33 #include <linux/if_packet.h>
34 #include <linux/sockios.h>
35
36 #include "libbb.h"
37
38 /* We don't need more than 32 bits of the counter */
39 #define MONOTONIC_US() ((unsigned)monotonic_us())
40
41 struct arp_packet {
42         struct ether_header hdr;
43         struct ether_arp arp;
44 } ATTRIBUTE_PACKED;
45
46 enum {
47 /* 169.254.0.0 */
48         LINKLOCAL_ADDR = 0xa9fe0000,
49
50 /* protocol timeout parameters, specified in seconds */
51         PROBE_WAIT = 1,
52         PROBE_MIN = 1,
53         PROBE_MAX = 2,
54         PROBE_NUM = 3,
55         MAX_CONFLICTS = 10,
56         RATE_LIMIT_INTERVAL = 60,
57         ANNOUNCE_WAIT = 2,
58         ANNOUNCE_NUM = 2,
59         ANNOUNCE_INTERVAL = 2,
60         DEFEND_INTERVAL = 10
61 };
62
63 /* States during the configuration process. */
64 enum {
65         PROBE = 0,
66         RATE_LIMIT_PROBE,
67         ANNOUNCE,
68         MONITOR,
69         DEFEND
70 };
71
72 #define VDBG(fmt,args...) \
73         do { } while (0)
74
75 /**
76  * Pick a random link local IP address on 169.254/16, except that
77  * the first and last 256 addresses are reserved.
78  */
79 static void pick(struct in_addr *ip)
80 {
81         unsigned tmp;
82
83         do {
84                 tmp = rand() & IN_CLASSB_HOST;
85         } while (tmp > (IN_CLASSB_HOST - 0x0200));
86         ip->s_addr = htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
87 }
88
89 /**
90  * Broadcast an ARP packet.
91  */
92 static void arp(int fd, struct sockaddr *saddr, int op,
93         const struct ether_addr *source_addr, struct in_addr source_ip,
94         const struct ether_addr *target_addr, struct in_addr target_ip)
95 {
96         struct arp_packet p;
97         memset(&p, 0, sizeof(p));
98
99         // ether header
100         p.hdr.ether_type = htons(ETHERTYPE_ARP);
101         memcpy(p.hdr.ether_shost, source_addr, ETH_ALEN);
102         memset(p.hdr.ether_dhost, 0xff, ETH_ALEN);
103
104         // arp request
105         p.arp.arp_hrd = htons(ARPHRD_ETHER);
106         p.arp.arp_pro = htons(ETHERTYPE_IP);
107         p.arp.arp_hln = ETH_ALEN;
108         p.arp.arp_pln = 4;
109         p.arp.arp_op = htons(op);
110         memcpy(&p.arp.arp_sha, source_addr, ETH_ALEN);
111         memcpy(&p.arp.arp_spa, &source_ip, sizeof(p.arp.arp_spa));
112         memcpy(&p.arp.arp_tha, target_addr, ETH_ALEN);
113         memcpy(&p.arp.arp_tpa, &target_ip, sizeof(p.arp.arp_tpa));
114
115         // send it
116         xsendto(fd, &p, sizeof(p), saddr, sizeof(*saddr));
117
118         // Currently all callers ignore errors, that's why returns are
119         // commented out...
120         //return 0;
121 }
122
123 /**
124  * Run a script. argv[2] is already NULL.
125  */
126 static int run(char *argv[3], const char *intf, struct in_addr *ip)
127 {
128         int status;
129
130         VDBG("%s run %s %s\n", intf, argv[0], argv[1]);
131
132         if (ip) {
133                 char *addr = inet_ntoa(*ip);
134                 setenv("ip", addr, 1);
135                 bb_info_msg("%s %s %s", argv[1], intf, addr);
136         }
137
138         status = wait4pid(spawn(argv));
139         if (status < 0) {
140                 bb_perror_msg("%s %s", argv[1], intf);
141                 return -errno;
142         }
143         if (status != 0)
144                 bb_error_msg("script %s %s failed, exitcode=%d", argv[0], argv[1], status);
145         return status;
146 }
147
148 /**
149  * Return milliseconds of random delay, up to "secs" seconds.
150  */
151 static unsigned ALWAYS_INLINE ms_rdelay(unsigned secs)
152 {
153         return rand() % (secs * 1000);
154 }
155
156 /**
157  * main program
158  */
159 int zcip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
160 int zcip_main(int argc, char **argv)
161 {
162         int state = PROBE;
163         /* Prevent unaligned traps for ARM (see srand() below) */
164         struct ether_addr eth_addr __attribute__(( aligned(sizeof(unsigned)) ));
165         const char *why;
166         int fd;
167         char *r_opt;
168         unsigned opts;
169
170         /* Ugly trick, but I want these zeroed in one go */
171         struct {
172                 const struct in_addr null_ip;
173                 const struct ether_addr null_addr;
174                 struct sockaddr saddr;
175                 struct in_addr ip;
176                 struct ifreq ifr;
177                 char *intf;
178                 char *script_av[3];
179                 int timeout_ms; /* must be signed */
180                 unsigned conflicts;
181                 unsigned nprobes;
182                 unsigned nclaims;
183                 int ready;
184                 int verbose;
185         } L;
186 #define null_ip    (L.null_ip   )
187 #define null_addr  (L.null_addr )
188 #define saddr      (L.saddr     )
189 #define ip         (L.ip        )
190 #define ifr        (L.ifr       )
191 #define intf       (L.intf      )
192 #define script_av  (L.script_av )
193 #define timeout_ms (L.timeout_ms)
194 #define conflicts  (L.conflicts )
195 #define nprobes    (L.nprobes   )
196 #define nclaims    (L.nclaims   )
197 #define ready      (L.ready     )
198 #define verbose    (L.verbose   )
199
200         memset(&L, 0, sizeof(L));
201
202 #define FOREGROUND (opts & 1)
203 #define QUIT       (opts & 2)
204         // parse commandline: prog [options] ifname script
205         // exactly 2 args; -v accumulates and implies -f
206         opt_complementary = "=2:vv:vf";
207         opts = getopt32(argv, "fqr:v", &r_opt, &verbose);
208         if (!FOREGROUND) {
209                 /* Do it early, before all bb_xx_msg calls */
210                 openlog(applet_name, 0, LOG_DAEMON);
211                 logmode |= LOGMODE_SYSLOG;
212         }
213         if (opts & 4) { // -r n.n.n.n
214                 if (inet_aton(r_opt, &ip) == 0
215                  || (ntohl(ip.s_addr) & IN_CLASSB_NET) != LINKLOCAL_ADDR
216                 ) {
217                         bb_error_msg_and_die("invalid link address");
218                 }
219         }
220         // On NOMMU reexec early (or else we will rerun things twice)
221 #if !BB_MMU
222         if (!FOREGROUND)
223                 bb_daemonize_or_rexec(DAEMON_CHDIR_ROOT, argv);
224 #endif
225         argc -= optind;
226         argv += optind;
227
228         intf = argv[0];
229         script_av[0] = argv[1];
230         setenv("interface", intf, 1);
231
232         // initialize the interface (modprobe, ifup, etc)
233         script_av[1] = (char*)"init";
234         if (run(script_av, intf, NULL))
235                 return EXIT_FAILURE;
236
237         // initialize saddr
238         //memset(&saddr, 0, sizeof(saddr));
239         safe_strncpy(saddr.sa_data, intf, sizeof(saddr.sa_data));
240
241         // open an ARP socket
242         fd = xsocket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ARP));
243         // bind to the interface's ARP socket
244         xbind(fd, &saddr, sizeof(saddr));
245
246         // get the interface's ethernet address
247         //memset(&ifr, 0, sizeof(ifr));
248         strncpy(ifr.ifr_name, intf, sizeof(ifr.ifr_name));
249         xioctl(fd, SIOCGIFHWADDR, &ifr);
250         memcpy(&eth_addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
251
252         // start with some stable ip address, either a function of
253         // the hardware address or else the last address we used.
254         // NOTE: the sequence of addresses we try changes only
255         // depending on when we detect conflicts.
256         srand(*(unsigned*)&eth_addr);
257         if (ip.s_addr == 0)
258                 pick(&ip);
259
260         // FIXME cases to handle:
261         //  - zcip already running!
262         //  - link already has local address... just defend/update
263
264         // daemonize now; don't delay system startup
265         if (!FOREGROUND) {
266 #if BB_MMU
267                 bb_daemonize(DAEMON_CHDIR_ROOT);
268 #endif
269                 bb_info_msg("start, interface %s", intf);
270         }
271
272         // run the dynamic address negotiation protocol,
273         // restarting after address conflicts:
274         //  - start with some address we want to try
275         //  - short random delay
276         //  - arp probes to see if another host else uses it
277         //  - arp announcements that we're claiming it
278         //  - use it
279         //  - defend it, within limits
280         while (1) {
281                 struct pollfd fds[1];
282                 unsigned deadline_us;
283                 struct arp_packet p;
284
285                 int source_ip_conflict = 0;
286                 int target_ip_conflict = 0;
287
288                 fds[0].fd = fd;
289                 fds[0].events = POLLIN;
290                 fds[0].revents = 0;
291
292                 // poll, being ready to adjust current timeout
293                 if (!timeout_ms) {
294                         timeout_ms = ms_rdelay(PROBE_WAIT);
295                         // FIXME setsockopt(fd, SO_ATTACH_FILTER, ...) to
296                         // make the kernel filter out all packets except
297                         // ones we'd care about.
298                 }
299                 // set deadline_us to the point in time when we timeout
300                 deadline_us = MONOTONIC_US() + timeout_ms * 1000;
301
302                 VDBG("...wait %d %s nprobes=%u, nclaims=%u\n",
303                                 timeout_ms, intf, nprobes, nclaims);
304
305                 switch (safe_poll(fds, 1, timeout_ms)) {
306
307                 default:
308                         /*bb_perror_msg("poll"); - done in safe_poll */
309                         return EXIT_FAILURE;
310
311                 // timeout
312                 case 0:
313                         VDBG("state = %d\n", state);
314                         switch (state) {
315                         case PROBE:
316                                 // timeouts in the PROBE state mean no conflicting ARP packets
317                                 // have been received, so we can progress through the states
318                                 if (nprobes < PROBE_NUM) {
319                                         nprobes++;
320                                         VDBG("probe/%u %s@%s\n",
321                                                         nprobes, intf, inet_ntoa(ip));
322                                         arp(fd, &saddr, ARPOP_REQUEST,
323                                                         &eth_addr, null_ip,
324                                                         &null_addr, ip);
325                                         timeout_ms = PROBE_MIN * 1000;
326                                         timeout_ms += ms_rdelay(PROBE_MAX - PROBE_MIN);
327                                 }
328                                 else {
329                                         // Switch to announce state.
330                                         state = ANNOUNCE;
331                                         nclaims = 0;
332                                         VDBG("announce/%u %s@%s\n",
333                                                         nclaims, intf, inet_ntoa(ip));
334                                         arp(fd, &saddr, ARPOP_REQUEST,
335                                                         &eth_addr, ip,
336                                                         &eth_addr, ip);
337                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
338                                 }
339                                 break;
340                         case RATE_LIMIT_PROBE:
341                                 // timeouts in the RATE_LIMIT_PROBE state mean no conflicting ARP packets
342                                 // have been received, so we can move immediately to the announce state
343                                 state = ANNOUNCE;
344                                 nclaims = 0;
345                                 VDBG("announce/%u %s@%s\n",
346                                                 nclaims, intf, inet_ntoa(ip));
347                                 arp(fd, &saddr, ARPOP_REQUEST,
348                                                 &eth_addr, ip,
349                                                 &eth_addr, ip);
350                                 timeout_ms = ANNOUNCE_INTERVAL * 1000;
351                                 break;
352                         case ANNOUNCE:
353                                 // timeouts in the ANNOUNCE state mean no conflicting ARP packets
354                                 // have been received, so we can progress through the states
355                                 if (nclaims < ANNOUNCE_NUM) {
356                                         nclaims++;
357                                         VDBG("announce/%u %s@%s\n",
358                                                         nclaims, intf, inet_ntoa(ip));
359                                         arp(fd, &saddr, ARPOP_REQUEST,
360                                                         &eth_addr, ip,
361                                                         &eth_addr, ip);
362                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
363                                 }
364                                 else {
365                                         // Switch to monitor state.
366                                         state = MONITOR;
367                                         // link is ok to use earlier
368                                         // FIXME update filters
369                                         script_av[1] = (char*)"config";
370                                         run(script_av, intf, &ip);
371                                         ready = 1;
372                                         conflicts = 0;
373                                         timeout_ms = -1; // Never timeout in the monitor state.
374
375                                         // NOTE: all other exit paths
376                                         // should deconfig ...
377                                         if (QUIT)
378                                                 return EXIT_SUCCESS;
379                                 }
380                                 break;
381                         case DEFEND:
382                                 // We won!  No ARP replies, so just go back to monitor.
383                                 state = MONITOR;
384                                 timeout_ms = -1;
385                                 conflicts = 0;
386                                 break;
387                         default:
388                                 // Invalid, should never happen.  Restart the whole protocol.
389                                 state = PROBE;
390                                 pick(&ip);
391                                 timeout_ms = 0;
392                                 nprobes = 0;
393                                 nclaims = 0;
394                                 break;
395                         } // switch (state)
396                         break; // case 0 (timeout)
397
398                 // packets arriving
399                 case 1:
400                         // We need to adjust the timeout in case we didn't receive
401                         // a conflicting packet.
402                         if (timeout_ms > 0) {
403                                 unsigned diff = deadline_us - MONOTONIC_US();
404                                 if ((int)(diff) < 0) {
405                                         // Current time is greater than the expected timeout time.
406                                         // Should never happen.
407                                         VDBG("missed an expected timeout\n");
408                                         timeout_ms = 0;
409                                 } else {
410                                         VDBG("adjusting timeout\n");
411                                         timeout_ms = diff / 1000;
412                                         if (!timeout_ms) timeout_ms = 1;
413                                 }
414                         }
415
416                         if ((fds[0].revents & POLLIN) == 0) {
417                                 if (fds[0].revents & POLLERR) {
418                                         // FIXME: links routinely go down;
419                                         // this shouldn't necessarily exit.
420                                         bb_error_msg("%s: poll error", intf);
421                                         if (ready) {
422                                                 script_av[1] = (char*)"deconfig";
423                                                 run(script_av, intf, &ip);
424                                         }
425                                         return EXIT_FAILURE;
426                                 }
427                                 continue;
428                         }
429
430                         // read ARP packet
431                         if (recv(fd, &p, sizeof(p), 0) < 0) {
432                                 why = "recv";
433                                 goto bad;
434                         }
435                         if (p.hdr.ether_type != htons(ETHERTYPE_ARP))
436                                 continue;
437
438 #ifdef DEBUG
439                         {
440                                 struct ether_addr * sha = (struct ether_addr *) p.arp.arp_sha;
441                                 struct ether_addr * tha = (struct ether_addr *) p.arp.arp_tha;
442                                 struct in_addr * spa = (struct in_addr *) p.arp.arp_spa;
443                                 struct in_addr * tpa = (struct in_addr *) p.arp.arp_tpa;
444                                 VDBG("%s recv arp type=%d, op=%d,\n",
445                                         intf, ntohs(p.hdr.ether_type),
446                                         ntohs(p.arp.arp_op));
447                                 VDBG("\tsource=%s %s\n",
448                                         ether_ntoa(sha),
449                                         inet_ntoa(*spa));
450                                 VDBG("\ttarget=%s %s\n",
451                                         ether_ntoa(tha),
452                                         inet_ntoa(*tpa));
453                         }
454 #endif
455                         if (p.arp.arp_op != htons(ARPOP_REQUEST)
456                                         && p.arp.arp_op != htons(ARPOP_REPLY))
457                                 continue;
458
459                         if (memcmp(p.arp.arp_spa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
460                                 memcmp(&eth_addr, &p.arp.arp_sha, ETH_ALEN) != 0) {
461                                 source_ip_conflict = 1;
462                         }
463                         if (memcmp(p.arp.arp_tpa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
464                                 p.arp.arp_op == htons(ARPOP_REQUEST) &&
465                                 memcmp(&eth_addr, &p.arp.arp_tha, ETH_ALEN) != 0) {
466                                 target_ip_conflict = 1;
467                         }
468
469                         VDBG("state = %d, source ip conflict = %d, target ip conflict = %d\n",
470                                 state, source_ip_conflict, target_ip_conflict);
471                         switch (state) {
472                         case PROBE:
473                         case ANNOUNCE:
474                                 // When probing or announcing, check for source IP conflicts
475                                 // and other hosts doing ARP probes (target IP conflicts).
476                                 if (source_ip_conflict || target_ip_conflict) {
477                                         conflicts++;
478                                         if (conflicts >= MAX_CONFLICTS) {
479                                                 VDBG("%s ratelimit\n", intf);
480                                                 timeout_ms = RATE_LIMIT_INTERVAL * 1000;
481                                                 state = RATE_LIMIT_PROBE;
482                                         }
483
484                                         // restart the whole protocol
485                                         pick(&ip);
486                                         timeout_ms = 0;
487                                         nprobes = 0;
488                                         nclaims = 0;
489                                 }
490                                 break;
491                         case MONITOR:
492                                 // If a conflict, we try to defend with a single ARP probe.
493                                 if (source_ip_conflict) {
494                                         VDBG("monitor conflict -- defending\n");
495                                         state = DEFEND;
496                                         timeout_ms = DEFEND_INTERVAL * 1000;
497                                         arp(fd, &saddr,
498                                                         ARPOP_REQUEST,
499                                                         &eth_addr, ip,
500                                                         &eth_addr, ip);
501                                 }
502                                 break;
503                         case DEFEND:
504                                 // Well, we tried.  Start over (on conflict).
505                                 if (source_ip_conflict) {
506                                         state = PROBE;
507                                         VDBG("defend conflict -- starting over\n");
508                                         ready = 0;
509                                         script_av[1] = (char*)"deconfig";
510                                         run(script_av, intf, &ip);
511
512                                         // restart the whole protocol
513                                         pick(&ip);
514                                         timeout_ms = 0;
515                                         nprobes = 0;
516                                         nclaims = 0;
517                                 }
518                                 break;
519                         default:
520                                 // Invalid, should never happen.  Restart the whole protocol.
521                                 VDBG("invalid state -- starting over\n");
522                                 state = PROBE;
523                                 pick(&ip);
524                                 timeout_ms = 0;
525                                 nprobes = 0;
526                                 nclaims = 0;
527                                 break;
528                         } // switch state
529                         break; // case 1 (packets arriving)
530                 } // switch poll
531         } // while (1)
532  bad:
533         bb_perror_msg("%s, %s", intf, why);
534         return EXIT_FAILURE;
535 }