xioctl and friends by Tito <farmatito@tiscali.it>
[oweals/busybox.git] / networking / zcip.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * RFC3927 ZeroConf IPv4 Link-Local addressing
4  * (see <http://www.zeroconf.org/>)
5  *
6  * Copyright (C) 2003 by Arthur van Hoff (avh@strangeberry.com)
7  * Copyright (C) 2004 by David Brownell
8  *
9  * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
10  */
11
12 /*
13  * ZCIP just manages the 169.254.*.* addresses.  That network is not
14  * routed at the IP level, though various proxies or bridges can
15  * certainly be used.  Its naming is built over multicast DNS.
16  */
17
18 //#define DEBUG
19
20 // TODO:
21 // - more real-world usage/testing, especially daemon mode
22 // - kernel packet filters to reduce scheduling noise
23 // - avoid silent script failures, especially under load...
24 // - link status monitoring (restart on link-up; stop on link-down)
25
26 #include <syslog.h>
27 #include <poll.h>
28 #include <sys/wait.h>
29 #include <netinet/ether.h>
30 #include <net/ethernet.h>
31 #include <net/if.h>
32 #include <net/if_arp.h>
33 #include <linux/if_packet.h>
34 #include <linux/sockios.h>
35
36 #include "libbb.h"
37
38 /* We don't need more than 32 bits of the counter */
39 #define MONOTONIC_US() ((unsigned)monotonic_us())
40
41 struct arp_packet {
42         struct ether_header hdr;
43         struct ether_arp arp;
44 } ATTRIBUTE_PACKED;
45
46 enum {
47 /* 169.254.0.0 */
48         LINKLOCAL_ADDR = 0xa9fe0000,
49
50 /* protocol timeout parameters, specified in seconds */
51         PROBE_WAIT = 1,
52         PROBE_MIN = 1,
53         PROBE_MAX = 2,
54         PROBE_NUM = 3,
55         MAX_CONFLICTS = 10,
56         RATE_LIMIT_INTERVAL = 60,
57         ANNOUNCE_WAIT = 2,
58         ANNOUNCE_NUM = 2,
59         ANNOUNCE_INTERVAL = 2,
60         DEFEND_INTERVAL = 10
61 };
62
63 /* States during the configuration process. */
64 enum {
65         PROBE = 0,
66         RATE_LIMIT_PROBE,
67         ANNOUNCE,
68         MONITOR,
69         DEFEND
70 };
71
72 #define VDBG(fmt,args...) \
73         do { } while (0)
74
75 /**
76  * Pick a random link local IP address on 169.254/16, except that
77  * the first and last 256 addresses are reserved.
78  */
79 static void pick(struct in_addr *ip)
80 {
81         unsigned tmp;
82
83         do {
84                 tmp = rand() & IN_CLASSB_HOST;
85         } while (tmp > (IN_CLASSB_HOST - 0x0200));
86         ip->s_addr = htonl((LINKLOCAL_ADDR + 0x0100) + tmp);
87 }
88
89 /**
90  * Broadcast an ARP packet.
91  */
92 static void arp(int fd, struct sockaddr *saddr, int op,
93         const struct ether_addr *source_addr, struct in_addr source_ip,
94         const struct ether_addr *target_addr, struct in_addr target_ip)
95 {
96         struct arp_packet p;
97         memset(&p, 0, sizeof(p));
98
99         // ether header
100         p.hdr.ether_type = htons(ETHERTYPE_ARP);
101         memcpy(p.hdr.ether_shost, source_addr, ETH_ALEN);
102         memset(p.hdr.ether_dhost, 0xff, ETH_ALEN);
103
104         // arp request
105         p.arp.arp_hrd = htons(ARPHRD_ETHER);
106         p.arp.arp_pro = htons(ETHERTYPE_IP);
107         p.arp.arp_hln = ETH_ALEN;
108         p.arp.arp_pln = 4;
109         p.arp.arp_op = htons(op);
110         memcpy(&p.arp.arp_sha, source_addr, ETH_ALEN);
111         memcpy(&p.arp.arp_spa, &source_ip, sizeof(p.arp.arp_spa));
112         memcpy(&p.arp.arp_tha, target_addr, ETH_ALEN);
113         memcpy(&p.arp.arp_tpa, &target_ip, sizeof(p.arp.arp_tpa));
114
115         // send it
116         xsendto(fd, &p, sizeof(p), saddr, sizeof(*saddr));
117
118         // Currently all callers ignore errors, that's why returns are
119         // commented out...
120         //return 0;
121 }
122
123 /**
124  * Run a script. argv[2] is already NULL.
125  */
126 static int run(char *argv[3], const char *intf, struct in_addr *ip)
127 {
128         int status;
129
130         VDBG("%s run %s %s\n", intf, argv[0], argv[1]);
131
132         if (ip) {
133                 char *addr = inet_ntoa(*ip);
134                 setenv("ip", addr, 1);
135                 bb_info_msg("%s %s %s", argv[1], intf, addr);
136         }
137
138         status = wait4pid(spawn(argv));
139         if (status < 0) {
140                 bb_perror_msg("%s %s", argv[1], intf);
141                 return -errno;
142         }
143         if (status != 0)
144                 bb_error_msg("script %s %s failed, exitcode=%d", argv[0], argv[1], status);
145         return status;
146 }
147
148 /**
149  * Return milliseconds of random delay, up to "secs" seconds.
150  */
151 static unsigned ALWAYS_INLINE ms_rdelay(unsigned secs)
152 {
153         return rand() % (secs * 1000);
154 }
155
156 /**
157  * main program
158  */
159 int zcip_main(int argc, char **argv);
160 int zcip_main(int argc, char **argv)
161 {
162         int state = PROBE;
163         struct ether_addr eth_addr;
164         const char *why;
165         int fd;
166         char *r_opt;
167         unsigned opts;
168
169         /* Ugly trick, but I want these zeroed in one go */
170         struct {
171                 const struct in_addr null_ip;
172                 const struct ether_addr null_addr;
173                 struct sockaddr saddr;
174                 struct in_addr ip;
175                 struct ifreq ifr;
176                 char *intf;
177                 char *script_av[3];
178                 int timeout_ms; /* must be signed */
179                 unsigned conflicts;
180                 unsigned nprobes;
181                 unsigned nclaims;
182                 int ready;
183                 int verbose;
184         } L;
185 #define null_ip    (L.null_ip   )
186 #define null_addr  (L.null_addr )
187 #define saddr      (L.saddr     )
188 #define ip         (L.ip        )
189 #define ifr        (L.ifr       )
190 #define intf       (L.intf      )
191 #define script_av  (L.script_av )
192 #define timeout_ms (L.timeout_ms)
193 #define conflicts  (L.conflicts )
194 #define nprobes    (L.nprobes   )
195 #define nclaims    (L.nclaims   )
196 #define ready      (L.ready     )
197 #define verbose    (L.verbose   )
198
199         memset(&L, 0, sizeof(L));
200
201         srand(MONOTONIC_US());
202
203 #define FOREGROUND (opts & 1)
204 #define QUIT       (opts & 2)
205         // parse commandline: prog [options] ifname script
206         // exactly 2 args; -v accumulates and implies -f
207         opt_complementary = "=2:vv:vf";
208         opts = getopt32(argc, argv, "fqr:v", &r_opt, &verbose);
209         if (!FOREGROUND) {
210                 /* Do it early, before all bb_xx_msg calls */
211                 openlog(applet_name, 0, LOG_DAEMON);
212                 logmode |= LOGMODE_SYSLOG;
213         }
214         if (opts & 4) { // -r n.n.n.n
215                 if (inet_aton(r_opt, &ip) == 0
216                  || (ntohl(ip.s_addr) & IN_CLASSB_NET) != LINKLOCAL_ADDR
217                 ) {
218                         bb_error_msg_and_die("invalid link address");
219                 }
220         }
221         // On NOMMU reexec early (or else we will rerun things twice)
222 #if !BB_MMU
223         if (!FOREGROUND)
224                 bb_daemonize_or_rexec(DAEMON_CHDIR_ROOT, argv);
225 #endif
226         argc -= optind;
227         argv += optind;
228
229         intf = argv[0];
230         script_av[0] = argv[1];
231         setenv("interface", intf, 1);
232
233         // initialize the interface (modprobe, ifup, etc)
234         script_av[1] = (char*)"init";
235         if (run(script_av, intf, NULL))
236                 return EXIT_FAILURE;
237
238         // initialize saddr
239         //memset(&saddr, 0, sizeof(saddr));
240         safe_strncpy(saddr.sa_data, intf, sizeof(saddr.sa_data));
241
242         // open an ARP socket
243         fd = xsocket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ARP));
244         // bind to the interface's ARP socket
245         xbind(fd, &saddr, sizeof(saddr));
246
247         // get the interface's ethernet address
248         //memset(&ifr, 0, sizeof(ifr));
249         strncpy(ifr.ifr_name, intf, sizeof(ifr.ifr_name));
250         xioctl(fd, SIOCGIFHWADDR, &ifr);
251         memcpy(&eth_addr, &ifr.ifr_hwaddr.sa_data, ETH_ALEN);
252
253         // start with some stable ip address, either a function of
254         // the hardware address or else the last address we used.
255         // NOTE: the sequence of addresses we try changes only
256         // depending on when we detect conflicts.
257         // (SVID 3 bogon: who says that "short" is always 16 bits?)
258         seed48( (unsigned short*)&ifr.ifr_hwaddr.sa_data );
259         if (ip.s_addr == 0)
260                 pick(&ip);
261
262         // FIXME cases to handle:
263         //  - zcip already running!
264         //  - link already has local address... just defend/update
265
266         // daemonize now; don't delay system startup
267         if (!FOREGROUND) {
268 #if BB_MMU
269                 bb_daemonize(DAEMON_CHDIR_ROOT);
270 #endif
271                 bb_info_msg("start, interface %s", intf);
272         }
273
274         // run the dynamic address negotiation protocol,
275         // restarting after address conflicts:
276         //  - start with some address we want to try
277         //  - short random delay
278         //  - arp probes to see if another host else uses it
279         //  - arp announcements that we're claiming it
280         //  - use it
281         //  - defend it, within limits
282         while (1) {
283                 struct pollfd fds[1];
284                 unsigned deadline_us;
285                 struct arp_packet p;
286
287                 int source_ip_conflict = 0;
288                 int target_ip_conflict = 0;
289
290                 fds[0].fd = fd;
291                 fds[0].events = POLLIN;
292                 fds[0].revents = 0;
293
294                 // poll, being ready to adjust current timeout
295                 if (!timeout_ms) {
296                         timeout_ms = ms_rdelay(PROBE_WAIT);
297                         // FIXME setsockopt(fd, SO_ATTACH_FILTER, ...) to
298                         // make the kernel filter out all packets except
299                         // ones we'd care about.
300                 }
301                 // set deadline_us to the point in time when we timeout
302                 deadline_us = MONOTONIC_US() + timeout_ms * 1000;
303
304                 VDBG("...wait %d %s nprobes=%u, nclaims=%u\n",
305                                 timeout_ms, intf, nprobes, nclaims);
306                 switch (poll(fds, 1, timeout_ms)) {
307
308                 // timeout
309                 case 0:
310                         VDBG("state = %d\n", state);
311                         switch (state) {
312                         case PROBE:
313                                 // timeouts in the PROBE state mean no conflicting ARP packets
314                                 // have been received, so we can progress through the states
315                                 if (nprobes < PROBE_NUM) {
316                                         nprobes++;
317                                         VDBG("probe/%u %s@%s\n",
318                                                         nprobes, intf, inet_ntoa(ip));
319                                         arp(fd, &saddr, ARPOP_REQUEST,
320                                                         &eth_addr, null_ip,
321                                                         &null_addr, ip);
322                                         timeout_ms = PROBE_MIN * 1000;
323                                         timeout_ms += ms_rdelay(PROBE_MAX - PROBE_MIN);
324                                 }
325                                 else {
326                                         // Switch to announce state.
327                                         state = ANNOUNCE;
328                                         nclaims = 0;
329                                         VDBG("announce/%u %s@%s\n",
330                                                         nclaims, intf, inet_ntoa(ip));
331                                         arp(fd, &saddr, ARPOP_REQUEST,
332                                                         &eth_addr, ip,
333                                                         &eth_addr, ip);
334                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
335                                 }
336                                 break;
337                         case RATE_LIMIT_PROBE:
338                                 // timeouts in the RATE_LIMIT_PROBE state mean no conflicting ARP packets
339                                 // have been received, so we can move immediately to the announce state
340                                 state = ANNOUNCE;
341                                 nclaims = 0;
342                                 VDBG("announce/%u %s@%s\n",
343                                                 nclaims, intf, inet_ntoa(ip));
344                                 arp(fd, &saddr, ARPOP_REQUEST,
345                                                 &eth_addr, ip,
346                                                 &eth_addr, ip);
347                                 timeout_ms = ANNOUNCE_INTERVAL * 1000;
348                                 break;
349                         case ANNOUNCE:
350                                 // timeouts in the ANNOUNCE state mean no conflicting ARP packets
351                                 // have been received, so we can progress through the states
352                                 if (nclaims < ANNOUNCE_NUM) {
353                                         nclaims++;
354                                         VDBG("announce/%u %s@%s\n",
355                                                         nclaims, intf, inet_ntoa(ip));
356                                         arp(fd, &saddr, ARPOP_REQUEST,
357                                                         &eth_addr, ip,
358                                                         &eth_addr, ip);
359                                         timeout_ms = ANNOUNCE_INTERVAL * 1000;
360                                 }
361                                 else {
362                                         // Switch to monitor state.
363                                         state = MONITOR;
364                                         // link is ok to use earlier
365                                         // FIXME update filters
366                                         script_av[1] = (char*)"config";
367                                         run(script_av, intf, &ip);
368                                         ready = 1;
369                                         conflicts = 0;
370                                         timeout_ms = -1; // Never timeout in the monitor state.
371
372                                         // NOTE: all other exit paths
373                                         // should deconfig ...
374                                         if (QUIT)
375                                                 return EXIT_SUCCESS;
376                                 }
377                                 break;
378                         case DEFEND:
379                                 // We won!  No ARP replies, so just go back to monitor.
380                                 state = MONITOR;
381                                 timeout_ms = -1;
382                                 conflicts = 0;
383                                 break;
384                         default:
385                                 // Invalid, should never happen.  Restart the whole protocol.
386                                 state = PROBE;
387                                 pick(&ip);
388                                 timeout_ms = 0;
389                                 nprobes = 0;
390                                 nclaims = 0;
391                                 break;
392                         } // switch (state)
393                         break; // case 0 (timeout)
394                 // packets arriving
395                 case 1:
396                         // We need to adjust the timeout in case we didn't receive
397                         // a conflicting packet.
398                         if (timeout_ms > 0) {
399                                 unsigned diff = deadline_us - MONOTONIC_US();
400                                 if ((int)(diff) < 0) {
401                                         // Current time is greater than the expected timeout time.
402                                         // Should never happen.
403                                         VDBG("missed an expected timeout\n");
404                                         timeout_ms = 0;
405                                 } else {
406                                         VDBG("adjusting timeout\n");
407                                         timeout_ms = diff / 1000;
408                                         if (!timeout_ms) timeout_ms = 1;
409                                 }
410                         }
411
412                         if ((fds[0].revents & POLLIN) == 0) {
413                                 if (fds[0].revents & POLLERR) {
414                                         // FIXME: links routinely go down;
415                                         // this shouldn't necessarily exit.
416                                         bb_error_msg("%s: poll error", intf);
417                                         if (ready) {
418                                                 script_av[1] = (char*)"deconfig";
419                                                 run(script_av, intf, &ip);
420                                         }
421                                         return EXIT_FAILURE;
422                                 }
423                                 continue;
424                         }
425
426                         // read ARP packet
427                         if (recv(fd, &p, sizeof(p), 0) < 0) {
428                                 why = "recv";
429                                 goto bad;
430                         }
431                         if (p.hdr.ether_type != htons(ETHERTYPE_ARP))
432                                 continue;
433
434 #ifdef DEBUG
435                         {
436                                 struct ether_addr * sha = (struct ether_addr *) p.arp.arp_sha;
437                                 struct ether_addr * tha = (struct ether_addr *) p.arp.arp_tha;
438                                 struct in_addr * spa = (struct in_addr *) p.arp.arp_spa;
439                                 struct in_addr * tpa = (struct in_addr *) p.arp.arp_tpa;
440                                 VDBG("%s recv arp type=%d, op=%d,\n",
441                                         intf, ntohs(p.hdr.ether_type),
442                                         ntohs(p.arp.arp_op));
443                                 VDBG("\tsource=%s %s\n",
444                                         ether_ntoa(sha),
445                                         inet_ntoa(*spa));
446                                 VDBG("\ttarget=%s %s\n",
447                                         ether_ntoa(tha),
448                                         inet_ntoa(*tpa));
449                         }
450 #endif
451                         if (p.arp.arp_op != htons(ARPOP_REQUEST)
452                                         && p.arp.arp_op != htons(ARPOP_REPLY))
453                                 continue;
454
455                         if (memcmp(p.arp.arp_spa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
456                                 memcmp(&eth_addr, &p.arp.arp_sha, ETH_ALEN) != 0) {
457                                 source_ip_conflict = 1;
458                         }
459                         if (memcmp(p.arp.arp_tpa, &ip.s_addr, sizeof(struct in_addr)) == 0 &&
460                                 p.arp.arp_op == htons(ARPOP_REQUEST) &&
461                                 memcmp(&eth_addr, &p.arp.arp_tha, ETH_ALEN) != 0) {
462                                 target_ip_conflict = 1;
463                         }
464
465                         VDBG("state = %d, source ip conflict = %d, target ip conflict = %d\n",
466                                 state, source_ip_conflict, target_ip_conflict);
467                         switch (state) {
468                         case PROBE:
469                         case ANNOUNCE:
470                                 // When probing or announcing, check for source IP conflicts
471                                 // and other hosts doing ARP probes (target IP conflicts).
472                                 if (source_ip_conflict || target_ip_conflict) {
473                                         conflicts++;
474                                         if (conflicts >= MAX_CONFLICTS) {
475                                                 VDBG("%s ratelimit\n", intf);
476                                                 timeout_ms = RATE_LIMIT_INTERVAL * 1000;
477                                                 state = RATE_LIMIT_PROBE;
478                                         }
479
480                                         // restart the whole protocol
481                                         pick(&ip);
482                                         timeout_ms = 0;
483                                         nprobes = 0;
484                                         nclaims = 0;
485                                 }
486                                 break;
487                         case MONITOR:
488                                 // If a conflict, we try to defend with a single ARP probe.
489                                 if (source_ip_conflict) {
490                                         VDBG("monitor conflict -- defending\n");
491                                         state = DEFEND;
492                                         timeout_ms = DEFEND_INTERVAL * 1000;
493                                         arp(fd, &saddr,
494                                                         ARPOP_REQUEST,
495                                                         &eth_addr, ip,
496                                                         &eth_addr, ip);
497                                 }
498                                 break;
499                         case DEFEND:
500                                 // Well, we tried.  Start over (on conflict).
501                                 if (source_ip_conflict) {
502                                         state = PROBE;
503                                         VDBG("defend conflict -- starting over\n");
504                                         ready = 0;
505                                         script_av[1] = (char*)"deconfig";
506                                         run(script_av, intf, &ip);
507
508                                         // restart the whole protocol
509                                         pick(&ip);
510                                         timeout_ms = 0;
511                                         nprobes = 0;
512                                         nclaims = 0;
513                                 }
514                                 break;
515                         default:
516                                 // Invalid, should never happen.  Restart the whole protocol.
517                                 VDBG("invalid state -- starting over\n");
518                                 state = PROBE;
519                                 pick(&ip);
520                                 timeout_ms = 0;
521                                 nprobes = 0;
522                                 nclaims = 0;
523                                 break;
524                         } // switch state
525
526                         break; // case 1 (packets arriving)
527                 default:
528                         why = "poll";
529                         goto bad;
530                 } // switch poll
531         }
532  bad:
533         bb_perror_msg("%s, %s", intf, why);
534         return EXIT_FAILURE;
535 }