Try to reply with node address only when decrementing the TTL.
[oweals/tinc.git] / src / route.c
1 /*
2     route.c -- routing
3     Copyright (C) 2000-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License along
17     with this program; if not, write to the Free Software Foundation, Inc.,
18     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #include "system.h"
22
23 #include "avl_tree.h"
24 #include "connection.h"
25 #include "ethernet.h"
26 #include "ipv4.h"
27 #include "ipv6.h"
28 #include "logger.h"
29 #include "net.h"
30 #include "protocol.h"
31 #include "route.h"
32 #include "subnet.h"
33 #include "utils.h"
34
35 rmode_t routing_mode = RMODE_ROUTER;
36 fmode_t forwarding_mode = FMODE_INTERNAL;
37 bmode_t broadcast_mode = BMODE_MST;
38 bool decrement_ttl = false;
39 bool directonly = false;
40 bool priorityinheritance = false;
41 int macexpire = 600;
42 bool overwrite_mac = false;
43 mac_t mymac = {{0xFE, 0xFD, 0, 0, 0, 0}};
44
45 /* Sizes of various headers */
46
47 static const size_t ether_size = sizeof(struct ether_header);
48 static const size_t arp_size = sizeof(struct ether_arp);
49 static const size_t ip_size = sizeof(struct ip);
50 static const size_t icmp_size = sizeof(struct icmp) - sizeof(struct ip);
51 static const size_t ip6_size = sizeof(struct ip6_hdr);
52 static const size_t icmp6_size = sizeof(struct icmp6_hdr);
53 static const size_t ns_size = sizeof(struct nd_neighbor_solicit);
54 static const size_t opt_size = sizeof(struct nd_opt_hdr);
55
56 #ifndef MAX
57 #define MAX(a, b) ((a) > (b) ? (a) : (b))
58 #endif
59
60 /* RFC 1071 */
61
62 static uint16_t inet_checksum(void *data, int len, uint16_t prevsum) {
63         uint16_t *p = data;
64         uint32_t checksum = prevsum ^ 0xFFFF;
65
66         while(len >= 2) {
67                 checksum += *p++;
68                 len -= 2;
69         }
70         
71         if(len)
72                 checksum += *(uint8_t *)p;
73
74         while(checksum >> 16)
75                 checksum = (checksum & 0xFFFF) + (checksum >> 16);
76
77         return ~checksum;
78 }
79
80 static bool ratelimit(int frequency) {
81         static time_t lasttime = 0;
82         static int count = 0;
83         
84         if(lasttime == now) {
85                 if(count >= frequency)
86                         return true;
87         } else {
88                 lasttime = now;
89                 count = 0;
90         }
91
92         count++;
93         return false;
94 }
95
96 static bool checklength(node_t *source, vpn_packet_t *packet, length_t length) {
97         if(packet->len < length) {
98                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Got too short packet from %s (%s)", source->name, source->hostname);
99                 return false;
100         } else
101                 return true;
102 }
103
104 static void clamp_mss(const node_t *source, const node_t *via, vpn_packet_t *packet) {
105         if(!source || !via || !(via->options & OPTION_CLAMP_MSS))
106                 return;
107
108         uint16_t mtu = source->mtu;
109         if(via != myself && via->mtu < mtu)
110                 mtu = via->mtu;
111
112         /* Find TCP header */
113         int start = ether_size;
114         uint16_t type = packet->data[12] << 8 | packet->data[13];
115
116         if(type == ETH_P_8021Q) {
117                 start += 4;
118                 type = packet->data[16] << 8 | packet->data[17];
119         }
120
121         if(type == ETH_P_IP && packet->data[start + 9] == 6)
122                 start += (packet->data[start] & 0xf) * 4;
123         else if(type == ETH_P_IPV6 && packet->data[start + 6] == 6)
124                 start += 40;
125         else
126                 return;
127
128         if(packet->len <= start + 20)
129                 return;
130
131         /* Use data offset field to calculate length of options field */
132         int len = ((packet->data[start + 12] >> 4) - 5) * 4;
133
134         if(packet->len < start + 20 + len)
135                 return;
136
137         /* Search for MSS option header */
138         for(int i = 0; i < len;) {
139                 if(packet->data[start + 20 + i] == 0)
140                         break;
141
142                 if(packet->data[start + 20 + i] == 1) {
143                         i++;
144                         continue;
145                 }
146
147                 if(i > len - 2 || i > len - packet->data[start + 21 + i])
148                         break;
149
150                 if(packet->data[start + 20 + i] != 2) {
151                         if(packet->data[start + 21 + i] < 2)
152                                 break;
153                         i += packet->data[start + 21 + i];
154                         continue;
155                 }
156
157                 if(packet->data[start + 21] != 4)
158                         break;
159
160                 /* Found it */
161                 uint16_t oldmss = packet->data[start + 22 + i] << 8 | packet->data[start + 23 + i];
162                 uint16_t newmss = mtu - start - 20;
163                 uint16_t csum = packet->data[start + 16] << 8 | packet->data[start + 17];
164
165                 if(oldmss <= newmss)
166                         break;
167                 
168                 ifdebug(TRAFFIC) logger(LOG_INFO, "Clamping MSS of packet from %s to %s to %d", source->name, via->name, newmss);
169
170                 /* Update the MSS value and the checksum */
171                 packet->data[start + 22 + i] = newmss >> 8;
172                 packet->data[start + 23 + i] = newmss & 0xff;
173                 csum ^= 0xffff;
174                 csum -= oldmss;
175                 csum += newmss;
176                 csum ^= 0xffff;
177                 packet->data[start + 16] = csum >> 8;
178                 packet->data[start + 17] = csum & 0xff;
179                 break;
180         }
181 }
182
183 static void swap_mac_addresses(vpn_packet_t *packet) {
184         mac_t tmp;
185         memcpy(&tmp, &packet->data[0], sizeof tmp);
186         memcpy(&packet->data[0], &packet->data[6], sizeof tmp);
187         memcpy(&packet->data[6], &tmp, sizeof tmp);
188 }
189         
190 static void learn_mac(mac_t *address) {
191         subnet_t *subnet;
192         avl_node_t *node;
193         connection_t *c;
194
195         subnet = lookup_subnet_mac(myself, address);
196
197         /* If we don't know this MAC address yet, store it */
198
199         if(!subnet) {
200                 ifdebug(TRAFFIC) logger(LOG_INFO, "Learned new MAC address %x:%x:%x:%x:%x:%x",
201                                    address->x[0], address->x[1], address->x[2], address->x[3],
202                                    address->x[4], address->x[5]);
203
204                 subnet = new_subnet();
205                 subnet->type = SUBNET_MAC;
206                 subnet->expires = now + macexpire;
207                 subnet->net.mac.address = *address;
208                 subnet->weight = 10;
209                 subnet_add(myself, subnet);
210                 subnet_update(myself, subnet, true);
211
212                 /* And tell all other tinc daemons it's our MAC */
213
214                 for(node = connection_tree->head; node; node = node->next) {
215                         c = node->data;
216                         if(c->status.active)
217                                 send_add_subnet(c, subnet);
218                 }
219         }
220
221         if(subnet->expires)
222                 subnet->expires = now + macexpire;
223 }
224
225 void age_subnets(void) {
226         subnet_t *s;
227         connection_t *c;
228         avl_node_t *node, *next, *node2;
229
230         for(node = myself->subnet_tree->head; node; node = next) {
231                 next = node->next;
232                 s = node->data;
233                 if(s->expires && s->expires <= now) {
234                         ifdebug(TRAFFIC) {
235                                 char netstr[MAXNETSTR];
236                                 if(net2str(netstr, sizeof netstr, s))
237                                         logger(LOG_INFO, "Subnet %s expired", netstr);
238                         }
239
240                         for(node2 = connection_tree->head; node2; node2 = node2->next) {
241                                 c = node2->data;
242                                 if(c->status.active)
243                                         send_del_subnet(c, s);
244                         }
245
246                         subnet_update(myself, s, false);
247                         subnet_del(myself, s);
248                 }
249         }
250 }
251
252 /* RFC 792 */
253
254 static void route_ipv4_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
255         struct ip ip = {0};
256         struct icmp icmp = {0};
257         
258         struct in_addr ip_src;
259         struct in_addr ip_dst;
260         uint32_t oldlen;
261
262         if(ratelimit(3))
263                 return;
264         
265         /* Swap Ethernet source and destination addresses */
266
267         swap_mac_addresses(packet);
268
269         /* Copy headers from packet into properly aligned structs on the stack */
270
271         memcpy(&ip, packet->data + ether_size, ip_size);
272
273         /* Remember original source and destination */
274         
275         ip_src = ip.ip_src;
276         ip_dst = ip.ip_dst;
277
278         /* Try to reply with an IP address assigned to the local machine */
279
280         if (type == ICMP_TIME_EXCEEDED && code == ICMP_EXC_TTL) {
281                 int sockfd = socket(AF_INET, SOCK_DGRAM, 0);
282                 if (sockfd != -1) {
283                         struct sockaddr_in addr;
284                         memset(&addr, 0, sizeof(addr));
285                         addr.sin_family = AF_INET;
286                         addr.sin_addr = ip.ip_src;
287                         if (!connect(sockfd, (const struct sockaddr*) &addr, sizeof(addr))) {
288                                 memset(&addr, 0, sizeof(addr));
289                                 addr.sin_family = AF_INET;
290                                 socklen_t addrlen = sizeof(addr);
291                                 if (!getsockname(sockfd, (struct sockaddr*) &addr, &addrlen) && addrlen <= sizeof(addr)) {
292                                         ip_dst = addr.sin_addr;
293                                 }
294                         }
295                         close(sockfd);
296                 }
297         }
298
299         oldlen = packet->len - ether_size;
300
301         if(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
302                 icmp.icmp_nextmtu = htons(packet->len - ether_size);
303
304         if(oldlen >= IP_MSS - ip_size - icmp_size)
305                 oldlen = IP_MSS - ip_size - icmp_size;
306         
307         /* Copy first part of original contents to ICMP message */
308         
309         memmove(packet->data + ether_size + ip_size + icmp_size, packet->data + ether_size, oldlen);
310
311         /* Fill in IPv4 header */
312         
313         ip.ip_v = 4;
314         ip.ip_hl = ip_size / 4;
315         ip.ip_tos = 0;
316         ip.ip_len = htons(ip_size + icmp_size + oldlen);
317         ip.ip_id = 0;
318         ip.ip_off = 0;
319         ip.ip_ttl = 255;
320         ip.ip_p = IPPROTO_ICMP;
321         ip.ip_sum = 0;
322         ip.ip_src = ip_dst;
323         ip.ip_dst = ip_src;
324
325         ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
326         
327         /* Fill in ICMP header */
328         
329         icmp.icmp_type = type;
330         icmp.icmp_code = code;
331         icmp.icmp_cksum = 0;
332         
333         icmp.icmp_cksum = inet_checksum(&icmp, icmp_size, ~0);
334         icmp.icmp_cksum = inet_checksum(packet->data + ether_size + ip_size + icmp_size, oldlen, icmp.icmp_cksum);
335
336         /* Copy structs on stack back to packet */
337
338         memcpy(packet->data + ether_size, &ip, ip_size);
339         memcpy(packet->data + ether_size + ip_size, &icmp, icmp_size);
340         
341         packet->len = ether_size + ip_size + icmp_size + oldlen;
342
343         send_packet(source, packet);
344 }
345
346 /* RFC 791 */
347
348 static void fragment_ipv4_packet(node_t *dest, vpn_packet_t *packet, length_t ether_size) {
349         struct ip ip;
350         vpn_packet_t fragment;
351         int len, maxlen, todo;
352         uint8_t *offset;
353         uint16_t ip_off, origf;
354         
355         memcpy(&ip, packet->data + ether_size, ip_size);
356         fragment.priority = packet->priority;
357
358         if(ip.ip_hl != ip_size / 4)
359                 return;
360         
361         todo = ntohs(ip.ip_len) - ip_size;
362
363         if(ether_size + ip_size + todo != packet->len) {
364                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Length of packet (%d) doesn't match length in IPv4 header (%d)", packet->len, (int)(ether_size + ip_size + todo));
365                 return;
366         }
367
368         ifdebug(TRAFFIC) logger(LOG_INFO, "Fragmenting packet of %d bytes to %s (%s)", packet->len, dest->name, dest->hostname);
369
370         offset = packet->data + ether_size + ip_size;
371         maxlen = (dest->mtu - ether_size - ip_size) & ~0x7;
372         ip_off = ntohs(ip.ip_off);
373         origf = ip_off & ~IP_OFFMASK;
374         ip_off &= IP_OFFMASK;
375         
376         while(todo) {
377                 len = todo > maxlen ? maxlen : todo;
378                 memcpy(fragment.data + ether_size + ip_size, offset, len);
379                 todo -= len;
380                 offset += len;
381
382                 ip.ip_len = htons(ip_size + len);
383                 ip.ip_off = htons(ip_off | origf | (todo ? IP_MF : 0));
384                 ip.ip_sum = 0;
385                 ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
386                 memcpy(fragment.data, packet->data, ether_size);
387                 memcpy(fragment.data + ether_size, &ip, ip_size);
388                 fragment.len = ether_size + ip_size + len;
389
390                 send_packet(dest, &fragment);
391
392                 ip_off += len / 8;
393         }       
394 }
395
396 static void route_ipv4_unicast(node_t *source, vpn_packet_t *packet) {
397         subnet_t *subnet;
398         node_t *via;
399         ipv4_t dest;
400
401         memcpy(&dest, &packet->data[30], sizeof dest);
402         subnet = lookup_subnet_ipv4(&dest);
403
404         if(!subnet) {
405                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv4 destination address %d.%d.%d.%d",
406                                 source->name, source->hostname,
407                                 dest.x[0],
408                                 dest.x[1],
409                                 dest.x[2],
410                                 dest.x[3]);
411
412                 route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNKNOWN);
413                 return;
414         }
415         
416         if(subnet->owner == source) {
417                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
418                 return;
419         }
420
421         if(!subnet->owner->status.reachable)
422                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNREACH);
423
424         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
425                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
426
427         if(priorityinheritance)
428                 packet->priority = packet->data[15];
429
430         via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
431
432         if(via == source) {
433                 ifdebug(TRAFFIC) logger(LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
434                 return;
435         }
436         
437         if(directonly && subnet->owner != via)
438                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
439
440         if(via && packet->len > MAX(via->mtu, 590) && via != myself) {
441                 ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
442                 if(packet->data[20] & 0x40) {
443                         packet->len = MAX(via->mtu, 590);
444                         route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
445                 } else {
446                         fragment_ipv4_packet(via, packet, ether_size);
447                 }
448
449                 return;
450         }
451
452         clamp_mss(source, via, packet);
453  
454         send_packet(subnet->owner, packet);
455 }
456
457 static void route_ipv4(node_t *source, vpn_packet_t *packet) {
458         if(!checklength(source, packet, ether_size + ip_size))
459                 return;
460
461         if(broadcast_mode && (((packet->data[30] & 0xf0) == 0xe0) || (
462                         packet->data[30] == 255 &&
463                         packet->data[31] == 255 &&
464                         packet->data[32] == 255 &&
465                         packet->data[33] == 255)))
466                 broadcast_packet(source, packet);
467         else
468                 route_ipv4_unicast(source, packet);
469 }
470
471 /* RFC 2463 */
472
473 static void route_ipv6_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
474         struct ip6_hdr ip6;
475         struct icmp6_hdr icmp6 = {0};
476         uint16_t checksum;      
477
478         struct {
479                 struct in6_addr ip6_src;        /* source address */
480                 struct in6_addr ip6_dst;        /* destination address */
481                 uint32_t length;
482                 uint32_t next;
483         } pseudo;
484
485         if(ratelimit(3))
486                 return;
487         
488         /* Swap Ethernet source and destination addresses */
489
490         swap_mac_addresses(packet);
491
492         /* Copy headers from packet to structs on the stack */
493
494         memcpy(&ip6, packet->data + ether_size, ip6_size);
495
496         /* Remember original source and destination */
497         
498         pseudo.ip6_src = ip6.ip6_dst;
499         pseudo.ip6_dst = ip6.ip6_src;
500
501         /* Try to reply with an IP address assigned to the local machine */
502
503         if (type == ICMP6_TIME_EXCEEDED && code == ICMP6_TIME_EXCEED_TRANSIT) {
504                 int sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
505                 if (sockfd != -1) {
506                         struct sockaddr_in6 addr;
507                         memset(&addr, 0, sizeof(addr));
508                         addr.sin6_family = AF_INET6;
509                         addr.sin6_addr = ip6.ip6_src;
510                         if (!connect(sockfd, (const struct sockaddr*) &addr, sizeof(addr))) {
511                                 memset(&addr, 0, sizeof(addr));
512                                 addr.sin6_family = AF_INET6;
513                                 socklen_t addrlen = sizeof(addr);
514                                 if (!getsockname(sockfd, (struct sockaddr*) &addr, &addrlen) && addrlen <= sizeof(addr)) {
515                                         pseudo.ip6_src = addr.sin6_addr;
516                                 }
517                         }
518                         close(sockfd);
519                 }
520         }
521
522         pseudo.length = packet->len - ether_size;
523
524         if(type == ICMP6_PACKET_TOO_BIG)
525                 icmp6.icmp6_mtu = htonl(pseudo.length);
526         
527         if(pseudo.length >= IP_MSS - ip6_size - icmp6_size)
528                 pseudo.length = IP_MSS - ip6_size - icmp6_size;
529         
530         /* Copy first part of original contents to ICMP message */
531         
532         memmove(packet->data + ether_size + ip6_size + icmp6_size, packet->data + ether_size, pseudo.length);
533
534         /* Fill in IPv6 header */
535         
536         ip6.ip6_flow = htonl(0x60000000UL);
537         ip6.ip6_plen = htons(icmp6_size + pseudo.length);
538         ip6.ip6_nxt = IPPROTO_ICMPV6;
539         ip6.ip6_hlim = 255;
540         ip6.ip6_src = pseudo.ip6_src;
541         ip6.ip6_dst = pseudo.ip6_dst;
542
543         /* Fill in ICMP header */
544         
545         icmp6.icmp6_type = type;
546         icmp6.icmp6_code = code;
547         icmp6.icmp6_cksum = 0;
548
549         /* Create pseudo header */
550                 
551         pseudo.length = htonl(icmp6_size + pseudo.length);
552         pseudo.next = htonl(IPPROTO_ICMPV6);
553
554         /* Generate checksum */
555         
556         checksum = inet_checksum(&pseudo, sizeof(pseudo), ~0);
557         checksum = inet_checksum(&icmp6, icmp6_size, checksum);
558         checksum = inet_checksum(packet->data + ether_size + ip6_size + icmp6_size, ntohl(pseudo.length) - icmp6_size, checksum);
559
560         icmp6.icmp6_cksum = checksum;
561
562         /* Copy structs on stack back to packet */
563
564         memcpy(packet->data + ether_size, &ip6, ip6_size);
565         memcpy(packet->data + ether_size + ip6_size, &icmp6, icmp6_size);
566         
567         packet->len = ether_size + ip6_size + ntohl(pseudo.length);
568         
569         send_packet(source, packet);
570 }
571
572 static void route_ipv6_unicast(node_t *source, vpn_packet_t *packet) {
573         subnet_t *subnet;
574         node_t *via;
575         ipv6_t dest;
576
577         memcpy(&dest, &packet->data[38], sizeof dest);
578         subnet = lookup_subnet_ipv6(&dest);
579
580         if(!subnet) {
581                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv6 destination address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
582                                 source->name, source->hostname,
583                                 ntohs(dest.x[0]),
584                                 ntohs(dest.x[1]),
585                                 ntohs(dest.x[2]),
586                                 ntohs(dest.x[3]),
587                                 ntohs(dest.x[4]),
588                                 ntohs(dest.x[5]),
589                                 ntohs(dest.x[6]),
590                                 ntohs(dest.x[7]));
591
592                 route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR);
593                 return;
594         }
595
596         if(subnet->owner == source) {
597                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
598                 return;
599         }
600
601         if(!subnet->owner->status.reachable)
602                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE);
603
604         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
605                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
606
607         via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
608         
609         if(via == source) {
610                 ifdebug(TRAFFIC) logger(LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
611                 return;
612         }
613         
614         if(directonly && subnet->owner != via)
615                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
616
617         if(via && packet->len > MAX(via->mtu, 1294) && via != myself) {
618                 ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
619                 packet->len = MAX(via->mtu, 1294);
620                 route_ipv6_unreachable(source, packet, ether_size, ICMP6_PACKET_TOO_BIG, 0);
621                 return;
622         }
623
624         clamp_mss(source, via, packet);
625  
626         send_packet(subnet->owner, packet);
627 }
628
629 /* RFC 2461 */
630
631 static void route_neighborsol(node_t *source, vpn_packet_t *packet) {
632         struct ip6_hdr ip6;
633         struct nd_neighbor_solicit ns;
634         struct nd_opt_hdr opt;
635         subnet_t *subnet;
636         uint16_t checksum;
637         bool has_opt;
638
639         struct {
640                 struct in6_addr ip6_src;        /* source address */
641                 struct in6_addr ip6_dst;        /* destination address */
642                 uint32_t length;
643                 uint32_t next;
644         } pseudo;
645
646         if(!checklength(source, packet, ether_size + ip6_size + ns_size))
647                 return;
648         
649         has_opt = packet->len >= ether_size + ip6_size + ns_size + opt_size + ETH_ALEN;
650         
651         if(source != myself) {
652                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Got neighbor solicitation request from %s (%s) while in router mode!", source->name, source->hostname);
653                 return;
654         }
655
656         /* Copy headers from packet to structs on the stack */
657
658         memcpy(&ip6, packet->data + ether_size, ip6_size);
659         memcpy(&ns, packet->data + ether_size + ip6_size, ns_size);
660         if(has_opt)
661                 memcpy(&opt, packet->data + ether_size + ip6_size + ns_size, opt_size);
662
663         /* First, snatch the source address from the neighbor solicitation packet */
664
665         if(overwrite_mac)
666                 memcpy(mymac.x, packet->data + ETH_ALEN, ETH_ALEN);
667
668         /* Check if this is a valid neighbor solicitation request */
669
670         if(ns.nd_ns_hdr.icmp6_type != ND_NEIGHBOR_SOLICIT ||
671            (has_opt && opt.nd_opt_type != ND_OPT_SOURCE_LINKADDR)) {
672                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: received unknown type neighbor solicitation request");
673                 return;
674         }
675
676         /* Create pseudo header */
677
678         pseudo.ip6_src = ip6.ip6_src;
679         pseudo.ip6_dst = ip6.ip6_dst;
680         if(has_opt)
681                 pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
682         else
683                 pseudo.length = htonl(ns_size);
684         pseudo.next = htonl(IPPROTO_ICMPV6);
685
686         /* Generate checksum */
687
688         checksum = inet_checksum(&pseudo, sizeof(pseudo), ~0);
689         checksum = inet_checksum(&ns, ns_size, checksum);
690         if(has_opt) {
691                 checksum = inet_checksum(&opt, opt_size, checksum);
692                 checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
693         }
694
695         if(checksum) {
696                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: checksum error for neighbor solicitation request");
697                 return;
698         }
699
700         /* Check if the IPv6 address exists on the VPN */
701
702         subnet = lookup_subnet_ipv6((ipv6_t *) &ns.nd_ns_target);
703
704         if(!subnet) {
705                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: neighbor solicitation request for unknown address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
706                                    ntohs(((uint16_t *) &ns.nd_ns_target)[0]),
707                                    ntohs(((uint16_t *) &ns.nd_ns_target)[1]),
708                                    ntohs(((uint16_t *) &ns.nd_ns_target)[2]),
709                                    ntohs(((uint16_t *) &ns.nd_ns_target)[3]),
710                                    ntohs(((uint16_t *) &ns.nd_ns_target)[4]),
711                                    ntohs(((uint16_t *) &ns.nd_ns_target)[5]),
712                                    ntohs(((uint16_t *) &ns.nd_ns_target)[6]),
713                                    ntohs(((uint16_t *) &ns.nd_ns_target)[7]));
714
715                 return;
716         }
717
718         /* Check if it is for our own subnet */
719
720         if(subnet->owner == myself)
721                 return;                                 /* silently ignore */
722
723         /* Create neighbor advertation reply */
724
725         memcpy(packet->data, packet->data + ETH_ALEN, ETH_ALEN);        /* copy destination address */
726         packet->data[ETH_ALEN * 2 - 1] ^= 0xFF; /* mangle source address so it looks like it's not from us */
727
728         ip6.ip6_dst = ip6.ip6_src;                      /* swap destination and source protocoll address */
729         ip6.ip6_src = ns.nd_ns_target;
730
731         if(has_opt)
732                 memcpy(packet->data + ether_size + ip6_size + ns_size + opt_size, packet->data + ETH_ALEN, ETH_ALEN);   /* add fake source hard addr */
733
734         ns.nd_ns_cksum = 0;
735         ns.nd_ns_type = ND_NEIGHBOR_ADVERT;
736         ns.nd_ns_reserved = htonl(0x40000000UL);        /* Set solicited flag */
737         opt.nd_opt_type = ND_OPT_TARGET_LINKADDR;
738
739         /* Create pseudo header */
740
741         pseudo.ip6_src = ip6.ip6_src;
742         pseudo.ip6_dst = ip6.ip6_dst;
743         if(has_opt)
744                 pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
745         else
746                 pseudo.length = htonl(ns_size);
747         pseudo.next = htonl(IPPROTO_ICMPV6);
748
749         /* Generate checksum */
750
751         checksum = inet_checksum(&pseudo, sizeof(pseudo), ~0);
752         checksum = inet_checksum(&ns, ns_size, checksum);
753         if(has_opt) {
754                 checksum = inet_checksum(&opt, opt_size, checksum);
755                 checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
756         }
757
758         ns.nd_ns_hdr.icmp6_cksum = checksum;
759
760         /* Copy structs on stack back to packet */
761
762         memcpy(packet->data + ether_size, &ip6, ip6_size);
763         memcpy(packet->data + ether_size + ip6_size, &ns, ns_size);
764         if(has_opt)
765                 memcpy(packet->data + ether_size + ip6_size + ns_size, &opt, opt_size);
766
767         send_packet(source, packet);
768 }
769
770 static void route_ipv6(node_t *source, vpn_packet_t *packet) {
771         if(!checklength(source, packet, ether_size + ip6_size))
772                 return;
773
774         if(packet->data[20] == IPPROTO_ICMPV6 && checklength(source, packet, ether_size + ip6_size + icmp6_size) && packet->data[54] == ND_NEIGHBOR_SOLICIT) {
775                 route_neighborsol(source, packet);
776                 return;
777         }
778
779         if(broadcast_mode && packet->data[38] == 255)
780                 broadcast_packet(source, packet);
781         else
782                 route_ipv6_unicast(source, packet);
783 }
784
785 /* RFC 826 */
786
787 static void route_arp(node_t *source, vpn_packet_t *packet) {
788         struct ether_arp arp;
789         subnet_t *subnet;
790         struct in_addr addr;
791
792         if(!checklength(source, packet, ether_size + arp_size))
793                 return;
794
795         if(source != myself) {
796                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Got ARP request from %s (%s) while in router mode!", source->name, source->hostname);
797                 return;
798         }
799
800         /* First, snatch the source address from the ARP packet */
801
802         if(overwrite_mac)
803                 memcpy(mymac.x, packet->data + ETH_ALEN, ETH_ALEN);
804
805         /* Copy headers from packet to structs on the stack */
806
807         memcpy(&arp, packet->data + ether_size, arp_size);
808
809         /* Check if this is a valid ARP request */
810
811         if(ntohs(arp.arp_hrd) != ARPHRD_ETHER || ntohs(arp.arp_pro) != ETH_P_IP ||
812            arp.arp_hln != ETH_ALEN || arp.arp_pln != sizeof(addr) || ntohs(arp.arp_op) != ARPOP_REQUEST) {
813                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: received unknown type ARP request");
814                 return;
815         }
816
817         /* Check if the IPv4 address exists on the VPN */
818
819         subnet = lookup_subnet_ipv4((ipv4_t *) &arp.arp_tpa);
820
821         if(!subnet) {
822                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: ARP request for unknown address %d.%d.%d.%d",
823                                    arp.arp_tpa[0], arp.arp_tpa[1], arp.arp_tpa[2],
824                                    arp.arp_tpa[3]);
825                 return;
826         }
827
828         /* Check if it is for our own subnet */
829
830         if(subnet->owner == myself)
831                 return;                                 /* silently ignore */
832
833         memcpy(packet->data, packet->data + ETH_ALEN, ETH_ALEN);        /* copy destination address */
834         packet->data[ETH_ALEN * 2 - 1] ^= 0xFF; /* mangle source address so it looks like it's not from us */
835
836         memcpy(&addr, arp.arp_tpa, sizeof(addr));       /* save protocol addr */
837         memcpy(arp.arp_tpa, arp.arp_spa, sizeof(addr)); /* swap destination and source protocol address */
838         memcpy(arp.arp_spa, &addr, sizeof(addr));       /* ... */
839
840         memcpy(arp.arp_tha, arp.arp_sha, ETH_ALEN);     /* set target hard/proto addr */
841         memcpy(arp.arp_sha, packet->data + ETH_ALEN, ETH_ALEN); /* add fake source hard addr */
842         arp.arp_op = htons(ARPOP_REPLY);
843
844         /* Copy structs on stack back to packet */
845
846         memcpy(packet->data + ether_size, &arp, arp_size);
847
848         send_packet(source, packet);
849 }
850
851 static void route_mac(node_t *source, vpn_packet_t *packet) {
852         subnet_t *subnet;
853         mac_t dest;
854
855         /* Learn source address */
856
857         if(source == myself) {
858                 mac_t src;
859                 memcpy(&src, &packet->data[6], sizeof src);
860                 learn_mac(&src);
861         }
862
863         /* Lookup destination address */
864
865         memcpy(&dest, &packet->data[0], sizeof dest);
866         subnet = lookup_subnet_mac(NULL, &dest);
867
868         if(!subnet) {
869                 broadcast_packet(source, packet);
870                 return;
871         }
872
873         if(subnet->owner == source) {
874                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
875                 return;
876         }
877
878         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
879                 return;
880
881         uint16_t type = packet->data[12] << 8 | packet->data[13];
882
883         if(priorityinheritance && type == ETH_P_IP && packet->len >= ether_size + ip_size)
884                 packet->priority = packet->data[15];
885
886         // Handle packets larger than PMTU
887
888         node_t *via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
889
890         if(directonly && subnet->owner != via)
891                 return;
892         
893         if(via && packet->len > via->mtu && via != myself) {
894                 ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
895                 length_t ethlen = 14;
896
897                 if(type == ETH_P_8021Q) {
898                         type = packet->data[16] << 8 | packet->data[17];
899                         ethlen += 4;
900                 }
901
902                 if(type == ETH_P_IP && packet->len > 576 + ethlen) {
903                         if(packet->data[6 + ethlen] & 0x40) {
904                                 packet->len = via->mtu;
905                                 route_ipv4_unreachable(source, packet, ethlen, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
906                         } else {
907                                 fragment_ipv4_packet(via, packet, ethlen);
908                         }
909                         return;
910                 } else if(type == ETH_P_IPV6 && packet->len > 1280 + ethlen) {
911                         packet->len = via->mtu;
912                         route_ipv6_unreachable(source, packet, ethlen, ICMP6_PACKET_TOO_BIG, 0);
913                         return;
914                 }
915         }
916
917         clamp_mss(source, via, packet);
918  
919         send_packet(subnet->owner, packet);
920 }
921
922 static bool do_decrement_ttl(node_t *source, vpn_packet_t *packet) {
923         uint16_t type = packet->data[12] << 8 | packet->data[13];
924         length_t ethlen = ether_size;
925
926         if(type == ETH_P_8021Q) {
927                 type = packet->data[16] << 8 | packet->data[17];
928                 ethlen += 4;
929         }
930
931         switch (type) {
932                 case ETH_P_IP:
933                         if(!checklength(source, packet, ethlen + ip_size))
934                                 return false;
935
936                         if(packet->data[ethlen + 8] <= 1) {
937                                 if(packet->data[ethlen + 11] != IPPROTO_ICMP || packet->data[ethlen + 32] != ICMP_TIME_EXCEEDED)
938                                         route_ipv4_unreachable(source, packet, ethlen, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL);
939                                 return false;
940                         }
941
942                         uint16_t old = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
943                         packet->data[ethlen + 8]--;
944                         uint16_t new = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
945
946                         uint32_t checksum = packet->data[ethlen + 10] << 8 | packet->data[ethlen + 11];
947                         checksum += old + (~new & 0xFFFF);
948                         while(checksum >> 16)
949                                 checksum = (checksum & 0xFFFF) + (checksum >> 16);
950                         packet->data[ethlen + 10] = checksum >> 8;
951                         packet->data[ethlen + 11] = checksum & 0xff;
952
953                         return true;
954
955                 case ETH_P_IPV6:
956                         if(!checklength(source, packet, ethlen + ip6_size))
957                                 return false;
958
959                         if(packet->data[ethlen + 7] <= 1) {
960                                 if(packet->data[ethlen + 6] != IPPROTO_ICMPV6 || packet->data[ethlen + 40] != ICMP6_TIME_EXCEEDED)
961                                         route_ipv6_unreachable(source, packet, ethlen, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT);
962                                 return false;
963                         }
964
965                         packet->data[ethlen + 7]--;
966
967                         return true;
968
969                 default:
970                         return true;
971         }
972 }
973
974 void route(node_t *source, vpn_packet_t *packet) {
975         if(forwarding_mode == FMODE_KERNEL && source != myself) {
976                 send_packet(myself, packet);
977                 return;
978         }
979
980         if(!checklength(source, packet, ether_size))
981                 return;
982
983         if(decrement_ttl && source != myself)
984                 if(!do_decrement_ttl(source, packet))
985                         return;
986
987         switch (routing_mode) {
988                 case RMODE_ROUTER:
989                         {
990                                 uint16_t type = packet->data[12] << 8 | packet->data[13];
991
992                                 switch (type) {
993                                         case ETH_P_ARP:
994                                                 route_arp(source, packet);
995                                                 break;
996
997                                         case ETH_P_IP:
998                                                 route_ipv4(source, packet);
999                                                 break;
1000
1001                                         case ETH_P_IPV6:
1002                                                 route_ipv6(source, packet);
1003                                                 break;
1004
1005                                         default:
1006                                                 ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown type %hx", source->name, source->hostname, type);
1007                                                 break;
1008                                 }
1009                         }
1010                         break;
1011
1012                 case RMODE_SWITCH:
1013                         route_mac(source, packet);
1014                         break;
1015
1016                 case RMODE_HUB:
1017                         broadcast_packet(source, packet);
1018                         break;
1019         }
1020 }