de4ee0b1a8a13f180ce688e9fbb0dd2b19c6ed6f
[oweals/busybox.git] / networking / zcip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * RFC3927 ZeroConf IPv4 Link-Local addressing
4  * (see <http://www.zeroconf.org/>)
5  *
6  * Copyright (C) 2003 by Arthur van Hoff (avh@strangeberry.com)
7  * Copyright (C) 2004 by David Brownell
8  *
9  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
10  */
11
12 /*
13  * ZCIP just manages the 169.254.*.* addresses.  That network is not
14  * routed at the IP level, though various proxies or bridges can
15  * certainly be used.  Its naming is built over multicast DNS.
16  */
17
18 //#define DEBUG
19
20 // TODO:
21 // - more real-world usage/testing, especially daemon mode
22 // - kernel packet filters to reduce scheduling noise
23 // - avoid silent script failures, especially under load...
24 // - link status monitoring (restart on link-up; stop on link-down)
25
26 #include <syslog.h>
27 #include <poll.h>
28 #include <sys/wait.h>
29 #include <netinet/ether.h>
30 #include <net/ethernet.h>
31 #include <net/if.h>
32 #include <net/if_arp.h>
33 #include <linux/if_packet.h>
34 #include <linux/sockios.h>
35
36 #include "libbb.h"
37
38 /* We don't need more than 32 bits of the counter */
39 #define MONOTONIC_US() ((unsigned)monotonic_us())
40
41 struct arp_packet {
42         struct ether_header hdr;
43         struct ether_arp arp;
44 } ATTRIBUTE_PACKED;
45
46 enum {
47 /* 169.254.0.0 */
48         LINKLOCAL_ADDR = 0xa9fe0000,
49
50 /* protocol timeout parameters, specified in seconds */
51         PROBE_WAIT = 1,
52         PROBE_MIN = 1,
53         PROBE_MAX = 2,
54         PROBE_NUM = 3,
55         MAX_CONFLICTS = 10,
56         RATE_LIMIT_INTERVAL = 60,
57         ANNOUNCE_WAIT = 2,
58         ANNOUNCE_NUM = 2,
59         ANNOUNCE_INTERVAL = 2,
60         DEFEND_INTERVAL = 10
61 };
62
63 /* States during the configuration process. */
64 enum {
65         PROBE = 0,
66         RATE_LIMIT_PROBE,
67         ANNOUNCE,
68         MONITOR,
69         DEFEND
70 };
71
72 #define VDBG(fmt,args...) \
73         do { } while (0)
74
75 /**
76  * Pick a random link local IP address on 169.254/16, except that
77  * the first and last 256 addresses are reserved.
78  */
79 static void pick(struct in_addr *ip)
80 {
81         unsigned tmp;
82
83         do {
84                 tmp = rand() & IN_CLASSB_HOST;
85         } while (tmp > (IN_CLASSB_HOST - 0x0200));
86         ip->s_addr = htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
87 }
88
89 /**
90  * Broadcast an ARP packet.
91  */
92 static void arp(int fd, struct sockaddr *saddr, int op,
93         const struct ether_addr *source_addr, struct in_addr source_ip,
94         const struct ether_addr *target_addr, struct in_addr target_ip)
95 {
96         struct arp_packet p;
97         memset(&p, 0, sizeof(p));
98
99         // ether header
100         p.hdr.ether_type = htons(ETHERTYPE_ARP);
101         memcpy(p.hdr.ether_shost, source_addr, ETH_ALEN);
102         memset(p.hdr.ether_dhost, 0xff, ETH_ALEN);
103
104         // arp request
105         p.arp.arp_hrd = htons(ARPHRD_ETHER);
106         p.arp.arp_pro = htons(ETHERTYPE_IP);
107         p.arp.arp_hln = ETH_ALEN;
108         p.arp.arp_pln = 4;
109         p.arp.arp_op = htons(op);
110         memcpy(&p.arp.arp_sha, source_addr, ETH_ALEN);
111         memcpy(&p.arp.arp_spa, &source_ip, sizeof(p.arp.arp_spa));
112         memcpy(&p.arp.arp_tha, target_addr, ETH_ALEN);
113         memcpy(&p.arp.arp_tpa, &target_ip, sizeof(p.arp.arp_tpa));
114
115         // send it
116         xsendto(fd, &p, sizeof(p), saddr, sizeof(*saddr));
117
118         // Currently all callers ignore errors, that's why returns are
119         // commented out...
120         //return 0;
121 }
122
123 /**
124  * Run a script. argv[2] is already NULL.
125  */
126 static int run(char *argv[3], const char *intf, struct in_addr *ip)
127 {
128         int status;
129
130         VDBG("%s run %s %s\n", intf, argv[0], argv[1]);
131
132         if (ip) {
133                 char *addr = inet_ntoa(*ip);
134                 setenv("ip", addr, 1);
135                 bb_info_msg("%s %s %s", argv[1], intf, addr);
136         }
137
138         status = wait4pid(spawn(argv));
139         if (status < 0) {
140                 bb_perror_msg("%s %s", argv[1], intf);
141                 return -errno;
142         }
143         if (status != 0)
144                 bb_error_msg("script %s %s failed, exitcode=%d", argv[0], argv[1], status);
145         return status;
146 }
147
148 /**
149  * Return milliseconds of random delay, up to "secs" seconds.
150  */
151 static unsigned ALWAYS_INLINE ms_rdelay(unsigned secs)
152 {
153         return rand() % (secs * 1000);
154 }
155
156 /**
157  * main program
158  */
159 int zcip_main(int argc, char **argv);
160 int zcip_main(int argc, char **argv)
161 {
162         int state = PROBE;
163         struct ether_addr eth_addr;
164         const char *why;
165         int fd;
166         char *r_opt;
167         unsigned opts;
168
169         /* Ugly trick, but I want these zeroed in one go */
170         struct {
171                 const struct in_addr null_ip;
172                 const struct ether_addr null_addr;
173                 struct sockaddr saddr;
174                 struct in_addr ip;
175                 struct ifreq ifr;
176                 char *intf;
177                 char *script_av[3];
178                 int timeout_ms; /* must be signed */
179                 unsigned conflicts;
180                 unsigned nprobes;
181                 unsigned nclaims;
182                 int ready;
183                 int verbose;
184         } L;
185 #define null_ip    (L.null_ip   )
186 #define null_addr  (L.null_addr )
187 #define saddr      (L.saddr     )
188 #define ip         (L.ip        )
189 #define ifr        (L.ifr       )
190 #define intf       (L.intf      )
191 #define script_av  (L.script_av )
192 #define timeout_ms (L.timeout_ms)
193 #define conflicts  (L.conflicts )
194 #define nprobes    (L.nprobes   )
195 #define nclaims    (L.nclaims   )
196 #define ready      (L.ready     )
197 #define verbose    (L.verbose   )
198
199         memset(&L, 0, sizeof(L));
200
201         srand(MONOTONIC_US());
202
203 #define FOREGROUND (opts & 1)
204 #define QUIT       (opts & 2)
205         // parse commandline: prog [options] ifname script
206         // exactly 2 args; -v accumulates and implies -f
207         opt_complementary = "=2:vv:vf";
208         opts = getopt32(argc, argv, "fqr:v", &r_opt, &verbose);
209         if (!FOREGROUND) {
210                 /* Do it early, before all bb_xx_msg calls */
211                 openlog(applet_name, 0, LOG_DAEMON);
212                 logmode |= LOGMODE_SYSLOG;
213         }
214         if (opts & 4) { // -r n.n.n.n
215                 if (inet_aton(r_opt, &ip) == 0
216                  || (ntohl(ip.s_addr) & IN_CLASSB_NET) != LINKLOCAL_ADDR
217                 ) {
218                         bb_error_msg_and_die("invalid link address");
219                 }
220         }
221         // On NOMMU reexec early (or else we will rerun things twice)
222 #if !BB_MMU
223         if (!FOREGROUND)
224                 bb_daemonize_or_rexec(DAEMON_CHDIR_ROOT, argv);
225 #endif
226         argc -= optind;
227         argv += optind;
228
229         intf = argv[0];
230         script_av[0] = argv[1];
231         setenv("interface", intf, 1);
232
233         // initialize the interface (modprobe, ifup, etc)
234         script_av[1] = (char*)"init";
235         if (run(script_av, intf, NULL))
236                 return EXIT_FAILURE;
237
238         // initialize saddr
239         //memset(&saddr, 0, sizeof(saddr));
240         safe_strncpy(saddr.sa_data, intf, sizeof(saddr.sa_data));
241
242         // open an ARP socket
243         fd = xsocket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ARP));
244         // bind to the interface's ARP socket
245         xbind(fd, &saddr, sizeof(saddr));
246
247         // get the interface's ethernet address
248         //memset(&ifr, 0, sizeof(ifr));
249         strncpy(ifr.ifr_name, intf, sizeof(ifr.ifr_name));
250         if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
251                 bb_perror_msg_and_die("get ethernet address");
252         }
253         memcpy(&eth_addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
254
255         // start with some stable ip address, either a function of
256         // the hardware address or else the last address we used.
257         // NOTE: the sequence of addresses we try changes only
258         // depending on when we detect conflicts.
259         // (SVID 3 bogon: who says that "short" is always 16 bits?)
260         seed48( (unsigned short*)&ifr.ifr_hwaddr.sa_data );
261         if (ip.s_addr == 0)
262                 pick(&ip);
263
264         // FIXME cases to handle:
265         //  - zcip already running!
266         //  - link already has local address... just defend/update
267
268         // daemonize now; don't delay system startup
269         if (!FOREGROUND) {
270 #if BB_MMU
271                 bb_daemonize(DAEMON_CHDIR_ROOT);
272 #endif
273                 bb_info_msg("start, interface %s", intf);
274         }
275
276         // run the dynamic address negotiation protocol,
277         // restarting after address conflicts:
278         //  - start with some address we want to try
279         //  - short random delay
280         //  - arp probes to see if another host else uses it
281         //  - arp announcements that we're claiming it
282         //  - use it
283         //  - defend it, within limits
284         while (1) {
285                 struct pollfd fds[1];
286                 unsigned deadline_us;
287                 struct arp_packet p;
288
289                 int source_ip_conflict = 0;
290                 int target_ip_conflict = 0;
291
292                 fds[0].fd = fd;
293                 fds[0].events = POLLIN;
294                 fds[0].revents = 0;
295
296                 // poll, being ready to adjust current timeout
297                 if (!timeout_ms) {
298                         timeout_ms = ms_rdelay(PROBE_WAIT);
299                         // FIXME setsockopt(fd, SO_ATTACH_FILTER, ...) to
300                         // make the kernel filter out all packets except
301                         // ones we'd care about.
302                 }
303                 // set deadline_us to the point in time when we timeout
304                 deadline_us = MONOTONIC_US() + timeout_ms * 1000;
305
306                 VDBG("...wait %d %s nprobes=%u, nclaims=%u\n",
307                                 timeout_ms, intf, nprobes, nclaims);
308                 switch (poll(fds, 1, timeout_ms)) {
309
310                 // timeout
311                 case 0:
312                         VDBG("state = %d\n", state);
313                         switch (state) {
314                         case PROBE:
315                                 // timeouts in the PROBE state mean no conflicting ARP packets
316                                 // have been received, so we can progress through the states
317                                 if (nprobes < PROBE_NUM) {
318                                         nprobes++;
319                                         VDBG("probe/%u %s@%s\n",
320                                                         nprobes, intf, inet_ntoa(ip));
321                                         arp(fd, &saddr, ARPOP_REQUEST,
322                                                         &eth_addr, null_ip,
323                                                         &null_addr, ip);
324                                         timeout_ms = PROBE_MIN * 1000;
325                                         timeout_ms += ms_rdelay(PROBE_MAX - PROBE_MIN);
326                                 }
327                                 else {
328                                         // Switch to announce state.
329                                         state = ANNOUNCE;
330                                         nclaims = 0;
331                                         VDBG("announce/%u %s@%s\n",
332                                                         nclaims, intf, inet_ntoa(ip));
333                                         arp(fd, &saddr, ARPOP_REQUEST,
334                                                         &eth_addr, ip,
335                                                         &eth_addr, ip);
336                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
337                                 }
338                                 break;
339                         case RATE_LIMIT_PROBE:
340                                 // timeouts in the RATE_LIMIT_PROBE state mean no conflicting ARP packets
341                                 // have been received, so we can move immediately to the announce state
342                                 state = ANNOUNCE;
343                                 nclaims = 0;
344                                 VDBG("announce/%u %s@%s\n",
345                                                 nclaims, intf, inet_ntoa(ip));
346                                 arp(fd, &saddr, ARPOP_REQUEST,
347                                                 &eth_addr, ip,
348                                                 &eth_addr, ip);
349                                 timeout_ms = ANNOUNCE_INTERVAL * 1000;
350                                 break;
351                         case ANNOUNCE:
352                                 // timeouts in the ANNOUNCE state mean no conflicting ARP packets
353                                 // have been received, so we can progress through the states
354                                 if (nclaims < ANNOUNCE_NUM) {
355                                         nclaims++;
356                                         VDBG("announce/%u %s@%s\n",
357                                                         nclaims, intf, inet_ntoa(ip));
358                                         arp(fd, &saddr, ARPOP_REQUEST,
359                                                         &eth_addr, ip,
360                                                         &eth_addr, ip);
361                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
362                                 }
363                                 else {
364                                         // Switch to monitor state.
365                                         state = MONITOR;
366                                         // link is ok to use earlier
367                                         // FIXME update filters
368                                         script_av[1] = (char*)"config";
369                                         run(script_av, intf, &ip);
370                                         ready = 1;
371                                         conflicts = 0;
372                                         timeout_ms = -1; // Never timeout in the monitor state.
373
374                                         // NOTE: all other exit paths
375                                         // should deconfig ...
376                                         if (QUIT)
377                                                 return EXIT_SUCCESS;
378                                 }
379                                 break;
380                         case DEFEND:
381                                 // We won!  No ARP replies, so just go back to monitor.
382                                 state = MONITOR;
383                                 timeout_ms = -1;
384                                 conflicts = 0;
385                                 break;
386                         default:
387                                 // Invalid, should never happen.  Restart the whole protocol.
388                                 state = PROBE;
389                                 pick(&ip);
390                                 timeout_ms = 0;
391                                 nprobes = 0;
392                                 nclaims = 0;
393                                 break;
394                         } // switch (state)
395                         break; // case 0 (timeout)
396                 // packets arriving
397                 case 1:
398                         // We need to adjust the timeout in case we didn't receive
399                         // a conflicting packet.
400                         if (timeout_ms > 0) {
401                                 unsigned diff = deadline_us - MONOTONIC_US();
402                                 if ((int)(diff) < 0) {
403                                         // Current time is greater than the expected timeout time.
404                                         // Should never happen.
405                                         VDBG("missed an expected timeout\n");
406                                         timeout_ms = 0;
407                                 } else {
408                                         VDBG("adjusting timeout\n");
409                                         timeout_ms = diff / 1000;
410                                         if (!timeout_ms) timeout_ms = 1;
411                                 }
412                         }
413
414                         if ((fds[0].revents & POLLIN) == 0) {
415                                 if (fds[0].revents & POLLERR) {
416                                         // FIXME: links routinely go down;
417                                         // this shouldn't necessarily exit.
418                                         bb_error_msg("%s: poll error", intf);
419                                         if (ready) {
420                                                 script_av[1] = (char*)"deconfig";
421                                                 run(script_av, intf, &ip);
422                                         }
423                                         return EXIT_FAILURE;
424                                 }
425                                 continue;
426                         }
427
428                         // read ARP packet
429                         if (recv(fd, &p, sizeof(p), 0) < 0) {
430                                 why = "recv";
431                                 goto bad;
432                         }
433                         if (p.hdr.ether_type != htons(ETHERTYPE_ARP))
434                                 continue;
435
436 #ifdef DEBUG
437                         {
438                                 struct ether_addr * sha = (struct ether_addr *) p.arp.arp_sha;
439                                 struct ether_addr * tha = (struct ether_addr *) p.arp.arp_tha;
440                                 struct in_addr * spa = (struct in_addr *) p.arp.arp_spa;
441                                 struct in_addr * tpa = (struct in_addr *) p.arp.arp_tpa;
442                                 VDBG("%s recv arp type=%d, op=%d,\n",
443                                         intf, ntohs(p.hdr.ether_type),
444                                         ntohs(p.arp.arp_op));
445                                 VDBG("\tsource=%s %s\n",
446                                         ether_ntoa(sha),
447                                         inet_ntoa(*spa));
448                                 VDBG("\ttarget=%s %s\n",
449                                         ether_ntoa(tha),
450                                         inet_ntoa(*tpa));
451                         }
452 #endif
453                         if (p.arp.arp_op != htons(ARPOP_REQUEST)
454                                         && p.arp.arp_op != htons(ARPOP_REPLY))
455                                 continue;
456
457                         if (memcmp(p.arp.arp_spa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
458                                 memcmp(&eth_addr, &p.arp.arp_sha, ETH_ALEN) != 0) {
459                                 source_ip_conflict = 1;
460                         }
461                         if (memcmp(p.arp.arp_tpa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
462                                 p.arp.arp_op == htons(ARPOP_REQUEST) &&
463                                 memcmp(&eth_addr, &p.arp.arp_tha, ETH_ALEN) != 0) {
464                                 target_ip_conflict = 1;
465                         }
466
467                         VDBG("state = %d, source ip conflict = %d, target ip conflict = %d\n",
468                                 state, source_ip_conflict, target_ip_conflict);
469                         switch (state) {
470                         case PROBE:
471                         case ANNOUNCE:
472                                 // When probing or announcing, check for source IP conflicts
473                                 // and other hosts doing ARP probes (target IP conflicts).
474                                 if (source_ip_conflict || target_ip_conflict) {
475                                         conflicts++;
476                                         if (conflicts >= MAX_CONFLICTS) {
477                                                 VDBG("%s ratelimit\n", intf);
478                                                 timeout_ms = RATE_LIMIT_INTERVAL * 1000;
479                                                 state = RATE_LIMIT_PROBE;
480                                         }
481
482                                         // restart the whole protocol
483                                         pick(&ip);
484                                         timeout_ms = 0;
485                                         nprobes = 0;
486                                         nclaims = 0;
487                                 }
488                                 break;
489                         case MONITOR:
490                                 // If a conflict, we try to defend with a single ARP probe.
491                                 if (source_ip_conflict) {
492                                         VDBG("monitor conflict -- defending\n");
493                                         state = DEFEND;
494                                         timeout_ms = DEFEND_INTERVAL * 1000;
495                                         arp(fd, &saddr,
496                                                         ARPOP_REQUEST,
497                                                         &eth_addr, ip,
498                                                         &eth_addr, ip);
499                                 }
500                                 break;
501                         case DEFEND:
502                                 // Well, we tried.  Start over (on conflict).
503                                 if (source_ip_conflict) {
504                                         state = PROBE;
505                                         VDBG("defend conflict -- starting over\n");
506                                         ready = 0;
507                                         script_av[1] = (char*)"deconfig";
508                                         run(script_av, intf, &ip);
509
510                                         // restart the whole protocol
511                                         pick(&ip);
512                                         timeout_ms = 0;
513                                         nprobes = 0;
514                                         nclaims = 0;
515                                 }
516                                 break;
517                         default:
518                                 // Invalid, should never happen.  Restart the whole protocol.
519                                 VDBG("invalid state -- starting over\n");
520                                 state = PROBE;
521                                 pick(&ip);
522                                 timeout_ms = 0;
523                                 nprobes = 0;
524                                 nclaims = 0;
525                                 break;
526                         } // switch state
527
528                         break; // case 1 (packets arriving)
529                 default:
530                         why = "poll";
531                         goto bad;
532                 } // switch poll
533         }
534  bad:
535         bb_perror_msg("%s, %s", intf, why);
536         return EXIT_FAILURE;
537 }