microcom: add copyright/license info
[oweals/busybox.git] / networking / zcip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * RFC3927 ZeroConf IPv4 Link-Local addressing
4  * (see <http://www.zeroconf.org/>)
5  *
6  * Copyright (C) 2003 by Arthur van Hoff (avh@strangeberry.com)
7  * Copyright (C) 2004 by David Brownell
8  *
9  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
10  */
11
12 /*
13  * ZCIP just manages the 169.254.*.* addresses.  That network is not
14  * routed at the IP level, though various proxies or bridges can
15  * certainly be used.  Its naming is built over multicast DNS.
16  */
17
18 //#define DEBUG
19
20 // TODO:
21 // - more real-world usage/testing, especially daemon mode
22 // - kernel packet filters to reduce scheduling noise
23 // - avoid silent script failures, especially under load...
24 // - link status monitoring (restart on link-up; stop on link-down)
25
26 #include <syslog.h>
27 #include <poll.h>
28 #include <sys/wait.h>
29 #include <netinet/ether.h>
30 #include <net/ethernet.h>
31 #include <net/if.h>
32 #include <net/if_arp.h>
33 #include <linux/if_packet.h>
34 #include <linux/sockios.h>
35
36 #include "libbb.h"
37
38 /* We don't need more than 32 bits of the counter */
39 #define MONOTONIC_US() ((unsigned)monotonic_us())
40
41 struct arp_packet {
42         struct ether_header hdr;
43         struct ether_arp arp;
44 } ATTRIBUTE_PACKED;
45
46 enum {
47 /* 169.254.0.0 */
48         LINKLOCAL_ADDR = 0xa9fe0000,
49
50 /* protocol timeout parameters, specified in seconds */
51         PROBE_WAIT = 1,
52         PROBE_MIN = 1,
53         PROBE_MAX = 2,
54         PROBE_NUM = 3,
55         MAX_CONFLICTS = 10,
56         RATE_LIMIT_INTERVAL = 60,
57         ANNOUNCE_WAIT = 2,
58         ANNOUNCE_NUM = 2,
59         ANNOUNCE_INTERVAL = 2,
60         DEFEND_INTERVAL = 10
61 };
62
63 /* States during the configuration process. */
64 enum {
65         PROBE = 0,
66         RATE_LIMIT_PROBE,
67         ANNOUNCE,
68         MONITOR,
69         DEFEND
70 };
71
72 #define VDBG(fmt,args...) \
73         do { } while (0)
74
75 /**
76  * Pick a random link local IP address on 169.254/16, except that
77  * the first and last 256 addresses are reserved.
78  */
79 static void pick(struct in_addr *ip)
80 {
81         unsigned tmp;
82
83         do {
84                 tmp = rand() & IN_CLASSB_HOST;
85         } while (tmp > (IN_CLASSB_HOST - 0x0200));
86         ip->s_addr = htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
87 }
88
89 /**
90  * Broadcast an ARP packet.
91  */
92 static void arp(int fd, struct sockaddr *saddr, int op,
93         const struct ether_addr *source_addr, struct in_addr source_ip,
94         const struct ether_addr *target_addr, struct in_addr target_ip)
95 {
96         struct arp_packet p;
97         memset(&p, 0, sizeof(p));
98
99         // ether header
100         p.hdr.ether_type = htons(ETHERTYPE_ARP);
101         memcpy(p.hdr.ether_shost, source_addr, ETH_ALEN);
102         memset(p.hdr.ether_dhost, 0xff, ETH_ALEN);
103
104         // arp request
105         p.arp.arp_hrd = htons(ARPHRD_ETHER);
106         p.arp.arp_pro = htons(ETHERTYPE_IP);
107         p.arp.arp_hln = ETH_ALEN;
108         p.arp.arp_pln = 4;
109         p.arp.arp_op = htons(op);
110         memcpy(&p.arp.arp_sha, source_addr, ETH_ALEN);
111         memcpy(&p.arp.arp_spa, &source_ip, sizeof(p.arp.arp_spa));
112         memcpy(&p.arp.arp_tha, target_addr, ETH_ALEN);
113         memcpy(&p.arp.arp_tpa, &target_ip, sizeof(p.arp.arp_tpa));
114
115         // send it
116         xsendto(fd, &p, sizeof(p), saddr, sizeof(*saddr));
117
118         // Currently all callers ignore errors, that's why returns are
119         // commented out...
120         //return 0;
121 }
122
123 /**
124  * Run a script. argv[2] is already NULL.
125  */
126 static int run(char *argv[3], const char *intf, struct in_addr *ip)
127 {
128         int status;
129
130         VDBG("%s run %s %s\n", intf, argv[0], argv[1]);
131
132         if (ip) {
133                 char *addr = inet_ntoa(*ip);
134                 setenv("ip", addr, 1);
135                 bb_info_msg("%s %s %s", argv[1], intf, addr);
136         }
137
138         status = wait4pid(spawn(argv));
139         if (status < 0) {
140                 bb_perror_msg("%s %s", argv[1], intf);
141                 return -errno;
142         }
143         if (status != 0)
144                 bb_error_msg("script %s %s failed, exitcode=%d", argv[0], argv[1], status);
145         return status;
146 }
147
148 /**
149  * Return milliseconds of random delay, up to "secs" seconds.
150  */
151 static unsigned ALWAYS_INLINE ms_rdelay(unsigned secs)
152 {
153         return rand() % (secs * 1000);
154 }
155
156 /**
157  * main program
158  */
159 int zcip_main(int argc, char **argv);
160 int zcip_main(int argc, char **argv)
161 {
162         int state = PROBE;
163         struct ether_addr eth_addr;
164         const char *why;
165         int fd;
166         char *r_opt;
167         unsigned opts;
168
169         /* Ugly trick, but I want these zeroed in one go */
170         struct {
171                 const struct in_addr null_ip;
172                 const struct ether_addr null_addr;
173                 struct sockaddr saddr;
174                 struct in_addr ip;
175                 struct ifreq ifr;
176                 char *intf;
177                 char *script_av[3];
178                 int timeout_ms; /* must be signed */
179                 unsigned conflicts;
180                 unsigned nprobes;
181                 unsigned nclaims;
182                 int ready;
183                 int verbose;
184         } L;
185 #define null_ip    (L.null_ip   )
186 #define null_addr  (L.null_addr )
187 #define saddr      (L.saddr     )
188 #define ip         (L.ip        )
189 #define ifr        (L.ifr       )
190 #define intf       (L.intf      )
191 #define script_av  (L.script_av )
192 #define timeout_ms (L.timeout_ms)
193 #define conflicts  (L.conflicts )
194 #define nprobes    (L.nprobes   )
195 #define nclaims    (L.nclaims   )
196 #define ready      (L.ready     )
197 #define verbose    (L.verbose   )
198
199         memset(&L, 0, sizeof(L));
200
201 #define FOREGROUND (opts & 1)
202 #define QUIT       (opts & 2)
203         // parse commandline: prog [options] ifname script
204         // exactly 2 args; -v accumulates and implies -f
205         opt_complementary = "=2:vv:vf";
206         opts = getopt32(argv, "fqr:v", &r_opt, &verbose);
207         if (!FOREGROUND) {
208                 /* Do it early, before all bb_xx_msg calls */
209                 openlog(applet_name, 0, LOG_DAEMON);
210                 logmode |= LOGMODE_SYSLOG;
211         }
212         if (opts & 4) { // -r n.n.n.n
213                 if (inet_aton(r_opt, &ip) == 0
214                  || (ntohl(ip.s_addr) & IN_CLASSB_NET) != LINKLOCAL_ADDR
215                 ) {
216                         bb_error_msg_and_die("invalid link address");
217                 }
218         }
219         // On NOMMU reexec early (or else we will rerun things twice)
220 #if !BB_MMU
221         if (!FOREGROUND)
222                 bb_daemonize_or_rexec(DAEMON_CHDIR_ROOT, argv);
223 #endif
224         argc -= optind;
225         argv += optind;
226
227         intf = argv[0];
228         script_av[0] = argv[1];
229         setenv("interface", intf, 1);
230
231         // initialize the interface (modprobe, ifup, etc)
232         script_av[1] = (char*)"init";
233         if (run(script_av, intf, NULL))
234                 return EXIT_FAILURE;
235
236         // initialize saddr
237         //memset(&saddr, 0, sizeof(saddr));
238         safe_strncpy(saddr.sa_data, intf, sizeof(saddr.sa_data));
239
240         // open an ARP socket
241         fd = xsocket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ARP));
242         // bind to the interface's ARP socket
243         xbind(fd, &saddr, sizeof(saddr));
244
245         // get the interface's ethernet address
246         //memset(&ifr, 0, sizeof(ifr));
247         strncpy(ifr.ifr_name, intf, sizeof(ifr.ifr_name));
248         xioctl(fd, SIOCGIFHWADDR, &ifr);
249         memcpy(&eth_addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
250
251         // start with some stable ip address, either a function of
252         // the hardware address or else the last address we used.
253         // NOTE: the sequence of addresses we try changes only
254         // depending on when we detect conflicts.
255         srand(*(unsigned*)&ifr.ifr_hwaddr.sa_data);
256         if (ip.s_addr == 0)
257                 pick(&ip);
258
259         // FIXME cases to handle:
260         //  - zcip already running!
261         //  - link already has local address... just defend/update
262
263         // daemonize now; don't delay system startup
264         if (!FOREGROUND) {
265 #if BB_MMU
266                 bb_daemonize(DAEMON_CHDIR_ROOT);
267 #endif
268                 bb_info_msg("start, interface %s", intf);
269         }
270
271         // run the dynamic address negotiation protocol,
272         // restarting after address conflicts:
273         //  - start with some address we want to try
274         //  - short random delay
275         //  - arp probes to see if another host else uses it
276         //  - arp announcements that we're claiming it
277         //  - use it
278         //  - defend it, within limits
279         while (1) {
280                 struct pollfd fds[1];
281                 unsigned deadline_us;
282                 struct arp_packet p;
283
284                 int source_ip_conflict = 0;
285                 int target_ip_conflict = 0;
286
287                 fds[0].fd = fd;
288                 fds[0].events = POLLIN;
289                 fds[0].revents = 0;
290
291                 // poll, being ready to adjust current timeout
292                 if (!timeout_ms) {
293                         timeout_ms = ms_rdelay(PROBE_WAIT);
294                         // FIXME setsockopt(fd, SO_ATTACH_FILTER, ...) to
295                         // make the kernel filter out all packets except
296                         // ones we'd care about.
297                 }
298                 // set deadline_us to the point in time when we timeout
299                 deadline_us = MONOTONIC_US() + timeout_ms * 1000;
300
301                 VDBG("...wait %d %s nprobes=%u, nclaims=%u\n",
302                                 timeout_ms, intf, nprobes, nclaims);
303                 switch (poll(fds, 1, timeout_ms)) {
304
305                 // timeout
306                 case 0:
307                         VDBG("state = %d\n", state);
308                         switch (state) {
309                         case PROBE:
310                                 // timeouts in the PROBE state mean no conflicting ARP packets
311                                 // have been received, so we can progress through the states
312                                 if (nprobes < PROBE_NUM) {
313                                         nprobes++;
314                                         VDBG("probe/%u %s@%s\n",
315                                                         nprobes, intf, inet_ntoa(ip));
316                                         arp(fd, &saddr, ARPOP_REQUEST,
317                                                         &eth_addr, null_ip,
318                                                         &null_addr, ip);
319                                         timeout_ms = PROBE_MIN * 1000;
320                                         timeout_ms += ms_rdelay(PROBE_MAX - PROBE_MIN);
321                                 }
322                                 else {
323                                         // Switch to announce state.
324                                         state = ANNOUNCE;
325                                         nclaims = 0;
326                                         VDBG("announce/%u %s@%s\n",
327                                                         nclaims, intf, inet_ntoa(ip));
328                                         arp(fd, &saddr, ARPOP_REQUEST,
329                                                         &eth_addr, ip,
330                                                         &eth_addr, ip);
331                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
332                                 }
333                                 break;
334                         case RATE_LIMIT_PROBE:
335                                 // timeouts in the RATE_LIMIT_PROBE state mean no conflicting ARP packets
336                                 // have been received, so we can move immediately to the announce state
337                                 state = ANNOUNCE;
338                                 nclaims = 0;
339                                 VDBG("announce/%u %s@%s\n",
340                                                 nclaims, intf, inet_ntoa(ip));
341                                 arp(fd, &saddr, ARPOP_REQUEST,
342                                                 &eth_addr, ip,
343                                                 &eth_addr, ip);
344                                 timeout_ms = ANNOUNCE_INTERVAL * 1000;
345                                 break;
346                         case ANNOUNCE:
347                                 // timeouts in the ANNOUNCE state mean no conflicting ARP packets
348                                 // have been received, so we can progress through the states
349                                 if (nclaims < ANNOUNCE_NUM) {
350                                         nclaims++;
351                                         VDBG("announce/%u %s@%s\n",
352                                                         nclaims, intf, inet_ntoa(ip));
353                                         arp(fd, &saddr, ARPOP_REQUEST,
354                                                         &eth_addr, ip,
355                                                         &eth_addr, ip);
356                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
357                                 }
358                                 else {
359                                         // Switch to monitor state.
360                                         state = MONITOR;
361                                         // link is ok to use earlier
362                                         // FIXME update filters
363                                         script_av[1] = (char*)"config";
364                                         run(script_av, intf, &ip);
365                                         ready = 1;
366                                         conflicts = 0;
367                                         timeout_ms = -1; // Never timeout in the monitor state.
368
369                                         // NOTE: all other exit paths
370                                         // should deconfig ...
371                                         if (QUIT)
372                                                 return EXIT_SUCCESS;
373                                 }
374                                 break;
375                         case DEFEND:
376                                 // We won!  No ARP replies, so just go back to monitor.
377                                 state = MONITOR;
378                                 timeout_ms = -1;
379                                 conflicts = 0;
380                                 break;
381                         default:
382                                 // Invalid, should never happen.  Restart the whole protocol.
383                                 state = PROBE;
384                                 pick(&ip);
385                                 timeout_ms = 0;
386                                 nprobes = 0;
387                                 nclaims = 0;
388                                 break;
389                         } // switch (state)
390                         break; // case 0 (timeout)
391                 // packets arriving
392                 case 1:
393                         // We need to adjust the timeout in case we didn't receive
394                         // a conflicting packet.
395                         if (timeout_ms > 0) {
396                                 unsigned diff = deadline_us - MONOTONIC_US();
397                                 if ((int)(diff) < 0) {
398                                         // Current time is greater than the expected timeout time.
399                                         // Should never happen.
400                                         VDBG("missed an expected timeout\n");
401                                         timeout_ms = 0;
402                                 } else {
403                                         VDBG("adjusting timeout\n");
404                                         timeout_ms = diff / 1000;
405                                         if (!timeout_ms) timeout_ms = 1;
406                                 }
407                         }
408
409                         if ((fds[0].revents & POLLIN) == 0) {
410                                 if (fds[0].revents & POLLERR) {
411                                         // FIXME: links routinely go down;
412                                         // this shouldn't necessarily exit.
413                                         bb_error_msg("%s: poll error", intf);
414                                         if (ready) {
415                                                 script_av[1] = (char*)"deconfig";
416                                                 run(script_av, intf, &ip);
417                                         }
418                                         return EXIT_FAILURE;
419                                 }
420                                 continue;
421                         }
422
423                         // read ARP packet
424                         if (recv(fd, &p, sizeof(p), 0) < 0) {
425                                 why = "recv";
426                                 goto bad;
427                         }
428                         if (p.hdr.ether_type != htons(ETHERTYPE_ARP))
429                                 continue;
430
431 #ifdef DEBUG
432                         {
433                                 struct ether_addr * sha = (struct ether_addr *) p.arp.arp_sha;
434                                 struct ether_addr * tha = (struct ether_addr *) p.arp.arp_tha;
435                                 struct in_addr * spa = (struct in_addr *) p.arp.arp_spa;
436                                 struct in_addr * tpa = (struct in_addr *) p.arp.arp_tpa;
437                                 VDBG("%s recv arp type=%d, op=%d,\n",
438                                         intf, ntohs(p.hdr.ether_type),
439                                         ntohs(p.arp.arp_op));
440                                 VDBG("\tsource=%s %s\n",
441                                         ether_ntoa(sha),
442                                         inet_ntoa(*spa));
443                                 VDBG("\ttarget=%s %s\n",
444                                         ether_ntoa(tha),
445                                         inet_ntoa(*tpa));
446                         }
447 #endif
448                         if (p.arp.arp_op != htons(ARPOP_REQUEST)
449                                         && p.arp.arp_op != htons(ARPOP_REPLY))
450                                 continue;
451
452                         if (memcmp(p.arp.arp_spa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
453                                 memcmp(&eth_addr, &p.arp.arp_sha, ETH_ALEN) != 0) {
454                                 source_ip_conflict = 1;
455                         }
456                         if (memcmp(p.arp.arp_tpa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
457                                 p.arp.arp_op == htons(ARPOP_REQUEST) &&
458                                 memcmp(&eth_addr, &p.arp.arp_tha, ETH_ALEN) != 0) {
459                                 target_ip_conflict = 1;
460                         }
461
462                         VDBG("state = %d, source ip conflict = %d, target ip conflict = %d\n",
463                                 state, source_ip_conflict, target_ip_conflict);
464                         switch (state) {
465                         case PROBE:
466                         case ANNOUNCE:
467                                 // When probing or announcing, check for source IP conflicts
468                                 // and other hosts doing ARP probes (target IP conflicts).
469                                 if (source_ip_conflict || target_ip_conflict) {
470                                         conflicts++;
471                                         if (conflicts >= MAX_CONFLICTS) {
472                                                 VDBG("%s ratelimit\n", intf);
473                                                 timeout_ms = RATE_LIMIT_INTERVAL * 1000;
474                                                 state = RATE_LIMIT_PROBE;
475                                         }
476
477                                         // restart the whole protocol
478                                         pick(&ip);
479                                         timeout_ms = 0;
480                                         nprobes = 0;
481                                         nclaims = 0;
482                                 }
483                                 break;
484                         case MONITOR:
485                                 // If a conflict, we try to defend with a single ARP probe.
486                                 if (source_ip_conflict) {
487                                         VDBG("monitor conflict -- defending\n");
488                                         state = DEFEND;
489                                         timeout_ms = DEFEND_INTERVAL * 1000;
490                                         arp(fd, &saddr,
491                                                         ARPOP_REQUEST,
492                                                         &eth_addr, ip,
493                                                         &eth_addr, ip);
494                                 }
495                                 break;
496                         case DEFEND:
497                                 // Well, we tried.  Start over (on conflict).
498                                 if (source_ip_conflict) {
499                                         state = PROBE;
500                                         VDBG("defend conflict -- starting over\n");
501                                         ready = 0;
502                                         script_av[1] = (char*)"deconfig";
503                                         run(script_av, intf, &ip);
504
505                                         // restart the whole protocol
506                                         pick(&ip);
507                                         timeout_ms = 0;
508                                         nprobes = 0;
509                                         nclaims = 0;
510                                 }
511                                 break;
512                         default:
513                                 // Invalid, should never happen.  Restart the whole protocol.
514                                 VDBG("invalid state -- starting over\n");
515                                 state = PROBE;
516                                 pick(&ip);
517                                 timeout_ms = 0;
518                                 nprobes = 0;
519                                 nclaims = 0;
520                                 break;
521                         } // switch state
522
523                         break; // case 1 (packets arriving)
524                 default:
525                         why = "poll";
526                         goto bad;
527                 } // switch poll
528         }
529  bad:
530         bb_perror_msg("%s, %s", intf, why);
531         return EXIT_FAILURE;
532 }