Further improve bandwidth estimation for type 2 MTU probe replies.
[oweals/tinc.git] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "process.h"
46 #include "route.h"
47 #include "utils.h"
48 #include "xalloc.h"
49
50 int keylifetime = 0;
51 #ifdef HAVE_LZO
52 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
53 #endif
54
55 static void send_udppacket(node_t *, vpn_packet_t *);
56
57 unsigned replaywin = 16;
58 bool localdiscovery = false;
59 sockaddr_t localdiscovery_address;
60
61 #define MAX_SEQNO 1073741824
62
63 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
64    mtuprobes ==    31: sleep pinginterval seconds
65    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
66    mtuprobes ==    33: no response from other side, restart PMTU discovery process
67
68    Probes are sent in batches of at least three, with random sizes between the
69    lower and upper boundaries for the MTU thus far discovered.
70
71    After the initial discovery, a fourth packet is added to each batch with a
72    size larger than the currently known PMTU, to test if the PMTU has increased.
73
74    In case local discovery is enabled, another packet is added to each batch,
75    which will be broadcast to the local network.
76
77 */
78
79 static void send_mtu_probe_handler(void *data) {
80         node_t *n = data;
81         int timeout = 1;
82
83         n->mtuprobes++;
84
85         if(!n->status.reachable || !n->status.validkey) {
86                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
87                 n->mtuprobes = 0;
88                 return;
89         }
90
91         if(n->mtuprobes > 32) {
92                 if(!n->minmtu) {
93                         n->mtuprobes = 31;
94                         timeout = pinginterval;
95                         goto end;
96                 }
97
98                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
99                 n->status.udp_confirmed = false;
100                 n->mtuprobes = 1;
101                 n->minmtu = 0;
102                 n->maxmtu = MTU;
103         }
104
105         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
106                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
107                 n->mtuprobes = 31;
108         }
109
110         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
111                 if(n->minmtu > n->maxmtu)
112                         n->minmtu = n->maxmtu;
113                 else
114                         n->maxmtu = n->minmtu;
115                 n->mtu = n->minmtu;
116                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
117                 n->mtuprobes = 31;
118         }
119
120         if(n->mtuprobes == 31) {
121                 timeout = pinginterval;
122                 goto end;
123         } else if(n->mtuprobes == 32) {
124                 timeout = pingtimeout;
125         }
126
127         for(int i = 0; i < 4 + localdiscovery; i++) {
128                 int len;
129
130                 if(i == 0) {
131                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
132                                 continue;
133                         len = n->maxmtu + 8;
134                 } else if(n->maxmtu <= n->minmtu) {
135                         len = n->maxmtu;
136                 } else {
137                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
138                 }
139
140                 if(len < 64)
141                         len = 64;
142
143                 vpn_packet_t packet;
144                 memset(packet.data, 0, 14);
145                 randomize(packet.data + 14, len - 14);
146                 packet.len = len;
147                 if(i >= 4 && n->mtuprobes <= 10)
148                         packet.priority = -1;
149                 else
150                         packet.priority = 0;
151
152                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
153
154                 send_udppacket(n, &packet);
155         }
156
157         n->probe_counter = 0;
158         gettimeofday(&n->probe_time, NULL);
159
160         /* Calculate the packet loss of incoming traffic by comparing the rate of
161            packets received to the rate with which the sequence number has increased.
162          */
163
164         if(n->received > n->prev_received)
165                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
166         else
167                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
168
169         n->prev_received_seqno = n->received_seqno;
170         n->prev_received = n->received;
171
172 end:
173         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
174 }
175
176 void send_mtu_probe(node_t *n) {
177         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
178         send_mtu_probe_handler(n);
179 }
180
181 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
182         if(!packet->data[0]) {
183                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
184
185                 /* It's a probe request, send back a reply */
186
187                 /* Type 2 probe replies were introduced in protocol 17.3 */
188                 if ((n->options >> 24) == 3) {
189                         uint8_t* data = packet->data;
190                         *data++ = 2;
191                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
192                         struct timeval now;
193                         gettimeofday(&now, NULL);
194                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
195                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
196                         packet->len = data - packet->data;
197                 } else {
198                         /* Legacy protocol: n won't understand type 2 probe replies. */
199                         packet->data[0] = 1;
200                 }
201
202                 /* Temporarily set udp_confirmed, so that the reply is sent
203                    back exactly the way it came in. */
204
205                 bool udp_confirmed = n->status.udp_confirmed;
206                 n->status.udp_confirmed = true;
207                 send_udppacket(n, packet);
208                 n->status.udp_confirmed = udp_confirmed;
209         } else {
210                 length_t probelen = len;
211                 if (packet->data[0] == 2) {
212                         if (len < 3)
213                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
214                         else {
215                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
216                         }
217                 }
218                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
219
220                 /* It's a valid reply: now we know bidirectional communication
221                    is possible using the address and socket that the reply
222                    packet used. */
223
224                 n->status.udp_confirmed = true;
225
226                 /* If we haven't established the PMTU yet, restart the discovery process. */
227
228                 if(n->mtuprobes > 30) {
229                         if (probelen == n->maxmtu + 8) {
230                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
231                                 n->maxmtu = MTU;
232                                 n->mtuprobes = 10;
233                                 return;
234                         }
235
236                         if(n->minmtu)
237                                 n->mtuprobes = 30;
238                         else
239                                 n->mtuprobes = 1;
240                 }
241
242                 /* If applicable, raise the minimum supported MTU */
243
244                 if(probelen > n->maxmtu)
245                         probelen = n->maxmtu;
246                 if(n->minmtu < probelen)
247                         n->minmtu = probelen;
248
249                 /* Calculate RTT and bandwidth.
250                    The RTT is the time between the MTU probe burst was sent and the first
251                    reply is received. The bandwidth is measured using the time between the
252                    arrival of the first and third probe reply (or type 2 probe requests).
253                  */
254
255                 struct timeval now, diff;
256                 gettimeofday(&now, NULL);
257                 timersub(&now, &n->probe_time, &diff);
258
259                 struct timeval probe_timestamp = now;
260                 if (packet->data[0] == 2 && packet->len >= 11) {
261                         uint32_t sec; memcpy(&sec, packet->data + 3, 4);
262                         uint32_t usec; memcpy(&usec, packet->data + 7, 4);
263                         probe_timestamp.tv_sec = ntohl(sec);
264                         probe_timestamp.tv_usec = ntohl(usec);
265                 }
266                 
267                 n->probe_counter++;
268
269                 if(n->probe_counter == 1) {
270                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
271                         n->probe_time = probe_timestamp;
272                 } else if(n->probe_counter == 3) {
273                         struct timeval probe_timestamp_diff;
274                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
275                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
276                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
277                 }
278         }
279 }
280
281 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
282         if(level == 0) {
283                 memcpy(dest, source, len);
284                 return len;
285         } else if(level == 10) {
286 #ifdef HAVE_LZO
287                 lzo_uint lzolen = MAXSIZE;
288                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
289                 return lzolen;
290 #else
291                 return -1;
292 #endif
293         } else if(level < 10) {
294 #ifdef HAVE_ZLIB
295                 unsigned long destlen = MAXSIZE;
296                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
297                         return destlen;
298                 else
299 #endif
300                         return -1;
301         } else {
302 #ifdef HAVE_LZO
303                 lzo_uint lzolen = MAXSIZE;
304                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
305                 return lzolen;
306 #else
307                 return -1;
308 #endif
309         }
310
311         return -1;
312 }
313
314 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
315         if(level == 0) {
316                 memcpy(dest, source, len);
317                 return len;
318         } else if(level > 9) {
319 #ifdef HAVE_LZO
320                 lzo_uint lzolen = MAXSIZE;
321                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
322                         return lzolen;
323                 else
324 #endif
325                         return -1;
326         }
327 #ifdef HAVE_ZLIB
328         else {
329                 unsigned long destlen = MAXSIZE;
330                 if(uncompress(dest, &destlen, source, len) == Z_OK)
331                         return destlen;
332                 else
333                         return -1;
334         }
335 #endif
336
337         return -1;
338 }
339
340 /* VPN packet I/O */
341
342 static void receive_packet(node_t *n, vpn_packet_t *packet) {
343         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
344                            packet->len, n->name, n->hostname);
345
346         n->in_packets++;
347         n->in_bytes += packet->len;
348
349         route(n, packet);
350 }
351
352 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
353         if(n->status.sptps)
354                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
355
356         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
357                 return false;
358
359         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
360 }
361
362 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
363         vpn_packet_t pkt1, pkt2;
364         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
365         int nextpkt = 0;
366         vpn_packet_t *outpkt = pkt[0];
367         size_t outlen;
368
369         if(n->status.sptps) {
370                 if(!n->sptps.state) {
371                         if(!n->status.waitingforkey) {
372                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
373                                 send_req_key(n);
374                         } else {
375                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
376                         }
377                         return;
378                 }
379                 sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
380                 return;
381         }
382
383         if(!cipher_active(n->incipher)) {
384                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
385                 return;
386         }
387
388         /* Check packet length */
389
390         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
391                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
392                                         n->name, n->hostname);
393                 return;
394         }
395
396         /* Check the message authentication code */
397
398         if(digest_active(n->indigest)) {
399                 inpkt->len -= digest_length(n->indigest);
400                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
401                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
402                         return;
403                 }
404         }
405         /* Decrypt the packet */
406
407         if(cipher_active(n->incipher)) {
408                 outpkt = pkt[nextpkt++];
409                 outlen = MAXSIZE;
410
411                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
412                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
413                         return;
414                 }
415
416                 outpkt->len = outlen;
417                 inpkt = outpkt;
418         }
419
420         /* Check the sequence number */
421
422         inpkt->len -= sizeof inpkt->seqno;
423         inpkt->seqno = ntohl(inpkt->seqno);
424
425         if(replaywin) {
426                 if(inpkt->seqno != n->received_seqno + 1) {
427                         if(inpkt->seqno >= n->received_seqno + replaywin * 8) {
428                                 if(n->farfuture++ < replaywin >> 2) {
429                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
430                                                 n->name, n->hostname, inpkt->seqno - n->received_seqno - 1, n->farfuture);
431                                         return;
432                                 }
433                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
434                                                 inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
435                                 memset(n->late, 0, replaywin);
436                         } else if (inpkt->seqno <= n->received_seqno) {
437                                 if((n->received_seqno >= replaywin * 8 && inpkt->seqno <= n->received_seqno - replaywin * 8) || !(n->late[(inpkt->seqno / 8) % replaywin] & (1 << inpkt->seqno % 8))) {
438                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
439                                                 n->name, n->hostname, inpkt->seqno, n->received_seqno);
440                                         return;
441                                 }
442                         } else {
443                                 for(int i = n->received_seqno + 1; i < inpkt->seqno; i++)
444                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
445                         }
446                 }
447
448                 n->farfuture = 0;
449                 n->late[(inpkt->seqno / 8) % replaywin] &= ~(1 << inpkt->seqno % 8);
450         }
451
452         if(inpkt->seqno > n->received_seqno)
453                 n->received_seqno = inpkt->seqno;
454
455         n->received++;
456
457         if(n->received_seqno > MAX_SEQNO)
458                 regenerate_key();
459
460         /* Decompress the packet */
461
462         length_t origlen = inpkt->len;
463
464         if(n->incompression) {
465                 outpkt = pkt[nextpkt++];
466
467                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
468                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
469                                                  n->name, n->hostname);
470                         return;
471                 }
472
473                 inpkt = outpkt;
474
475                 origlen -= MTU/64 + 20;
476         }
477
478         inpkt->priority = 0;
479
480         if(!inpkt->data[12] && !inpkt->data[13])
481                 mtu_probe_h(n, inpkt, origlen);
482         else
483                 receive_packet(n, inpkt);
484 }
485
486 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
487         vpn_packet_t outpkt;
488
489         if(len > sizeof outpkt.data)
490                 return;
491
492         outpkt.len = len;
493         if(c->options & OPTION_TCPONLY)
494                 outpkt.priority = 0;
495         else
496                 outpkt.priority = -1;
497         memcpy(outpkt.data, buffer, len);
498
499         receive_packet(c->node, &outpkt);
500 }
501
502 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
503         if(!n->status.validkey) {
504                 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
505                 if(!n->status.waitingforkey)
506                         send_req_key(n);
507                 else if(n->last_req_key + 10 < now.tv_sec) {
508                         logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
509                         sptps_stop(&n->sptps);
510                         n->status.waitingforkey = false;
511                         send_req_key(n);
512                 }
513                 return;
514         }
515
516         uint8_t type = 0;
517         int offset = 0;
518
519         if(!(origpkt->data[12] | origpkt->data[13])) {
520                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
521                 return;
522         }
523
524         if(routing_mode == RMODE_ROUTER)
525                 offset = 14;
526         else
527                 type = PKT_MAC;
528
529         if(origpkt->len < offset)
530                 return;
531
532         vpn_packet_t outpkt;
533
534         if(n->outcompression) {
535                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
536                 if(len < 0) {
537                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
538                 } else if(len < origpkt->len - offset) {
539                         outpkt.len = len + offset;
540                         origpkt = &outpkt;
541                         type |= PKT_COMPRESSED;
542                 }
543         }
544
545         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
546         return;
547 }
548
549 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
550         /* Latest guess */
551         *sa = &n->address;
552         *sock = n->sock;
553
554         /* If the UDP address is confirmed, use it. */
555         if(n->status.udp_confirmed)
556                 return;
557
558         /* Send every third packet to n->address; that could be set
559            to the node's reflexive UDP address discovered during key
560            exchange. */
561
562         static int x = 0;
563         if(++x >= 3) {
564                 x = 0;
565                 return;
566         }
567
568         /* Otherwise, address are found in edges to this node.
569            So we pick a random edge and a random socket. */
570
571         int i = 0;
572         int j = rand() % n->edge_tree->count;
573         edge_t *candidate = NULL;
574
575         for splay_each(edge_t, e, n->edge_tree) {
576                 if(i++ == j) {
577                         candidate = e->reverse;
578                         break;
579                 }
580         }
581
582         if(candidate) {
583                 *sa = &candidate->address;
584                 *sock = rand() % listen_sockets;
585         }
586
587         /* Make sure we have a suitable socket for the chosen address */
588         if(listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
589                 for(int i = 0; i < listen_sockets; i++) {
590                         if(listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
591                                 *sock = i;
592                                 break;
593                         }
594                 }
595         }
596 }
597
598 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
599         static sockaddr_t broadcast_ipv4 = {
600                 .in = {
601                         .sin_family = AF_INET,
602                         .sin_addr.s_addr = -1,
603                 }
604         };
605
606         static sockaddr_t broadcast_ipv6 = {
607                 .in6 = {
608                         .sin6_family = AF_INET6,
609                         .sin6_addr.s6_addr[0x0] = 0xff,
610                         .sin6_addr.s6_addr[0x1] = 0x02,
611                         .sin6_addr.s6_addr[0xf] = 0x01,
612                 }
613         };
614
615         *sock = rand() % listen_sockets;
616
617         if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
618                 if(localdiscovery_address.sa.sa_family == AF_INET6) {
619                         localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
620                         *sa = &localdiscovery_address;
621                 } else {
622                         broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
623                         broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
624                         *sa = &broadcast_ipv6;
625                 }
626         } else {
627                 if(localdiscovery_address.sa.sa_family == AF_INET) {
628                         localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
629                         *sa = &localdiscovery_address;
630                 } else {
631                         broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
632                         *sa = &broadcast_ipv4;
633                 }
634         }
635 }
636
637 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
638         vpn_packet_t pkt1, pkt2;
639         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
640         vpn_packet_t *inpkt = origpkt;
641         int nextpkt = 0;
642         vpn_packet_t *outpkt;
643         int origlen = origpkt->len;
644         size_t outlen;
645 #if defined(SOL_IP) && defined(IP_TOS)
646         static int priority = 0;
647 #endif
648         int origpriority = origpkt->priority;
649
650         if(!n->status.reachable) {
651                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
652                 return;
653         }
654
655         if(n->status.sptps)
656                 return send_sptps_packet(n, origpkt);
657
658         /* Make sure we have a valid key */
659
660         if(!n->status.validkey) {
661                 logger(DEBUG_TRAFFIC, LOG_INFO,
662                                    "No valid key known yet for %s (%s), forwarding via TCP",
663                                    n->name, n->hostname);
664
665                 if(n->last_req_key + 10 <= now.tv_sec) {
666                         send_req_key(n);
667                         n->last_req_key = now.tv_sec;
668                 }
669
670                 send_tcppacket(n->nexthop->connection, origpkt);
671
672                 return;
673         }
674
675         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
676                 logger(DEBUG_TRAFFIC, LOG_INFO,
677                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
678                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
679
680                 if(n != n->nexthop)
681                         send_packet(n->nexthop, origpkt);
682                 else
683                         send_tcppacket(n->nexthop->connection, origpkt);
684
685                 return;
686         }
687
688         /* Compress the packet */
689
690         if(n->outcompression) {
691                 outpkt = pkt[nextpkt++];
692
693                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
694                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
695                                    n->name, n->hostname);
696                         return;
697                 }
698
699                 inpkt = outpkt;
700         }
701
702         /* Add sequence number */
703
704         inpkt->seqno = htonl(++(n->sent_seqno));
705         inpkt->len += sizeof inpkt->seqno;
706
707         /* Encrypt the packet */
708
709         if(cipher_active(n->outcipher)) {
710                 outpkt = pkt[nextpkt++];
711                 outlen = MAXSIZE;
712
713                 if(!cipher_encrypt(n->outcipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
714                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
715                         goto end;
716                 }
717
718                 outpkt->len = outlen;
719                 inpkt = outpkt;
720         }
721
722         /* Add the message authentication code */
723
724         if(digest_active(n->outdigest)) {
725                 if(!digest_create(n->outdigest, &inpkt->seqno, inpkt->len, (char *)&inpkt->seqno + inpkt->len)) {
726                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
727                         goto end;
728                 }
729
730                 inpkt->len += digest_length(n->outdigest);
731         }
732
733         /* Send the packet */
734
735         const sockaddr_t *sa;
736         int sock;
737
738         /* Overloaded use of priority field: -1 means local broadcast */
739
740         if(origpriority == -1 && n->prevedge)
741                 choose_broadcast_address(n, &sa, &sock);
742         else
743                 choose_udp_address(n, &sa, &sock);
744
745 #if defined(SOL_IP) && defined(IP_TOS)
746         if(priorityinheritance && origpriority != priority
747            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
748                 priority = origpriority;
749                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
750                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
751                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", strerror(errno));
752         }
753 #endif
754
755         if(sendto(listen_socket[sock].udp.fd, (char *) &inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
756                 if(sockmsgsize(sockerrno)) {
757                         if(n->maxmtu >= origlen)
758                                 n->maxmtu = origlen - 1;
759                         if(n->mtu >= origlen)
760                                 n->mtu = origlen - 1;
761                 } else
762                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
763         }
764
765 end:
766         origpkt->len = origlen;
767 }
768
769 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
770         node_t *to = handle;
771
772         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
773
774         if(type >= SPTPS_HANDSHAKE || ((myself->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
775                 char buf[len * 4 / 3 + 5];
776                 b64encode(data, buf, len);
777                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
778                    to ensure we get to learn the reflexive UDP address. */
779                 if(!to->status.validkey)
780                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, myself->name, to->name, buf, myself->incompression);
781                 else
782                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, myself->name, to->name, REQ_SPTPS, buf);
783         }
784
785         /* Otherwise, send the packet via UDP */
786
787         const sockaddr_t *sa;
788         int sock;
789
790         choose_udp_address(to, &sa, &sock);
791
792         if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
793                 if(sockmsgsize(sockerrno)) {
794                         if(to->maxmtu >= len)
795                                 to->maxmtu = len - 1;
796                         if(to->mtu >= len)
797                                 to->mtu = len - 1;
798                 } else {
799                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
800                         return false;
801                 }
802         }
803
804         return true;
805 }
806
807 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
808         node_t *from = handle;
809
810         if(type == SPTPS_HANDSHAKE) {
811                 if(!from->status.validkey) {
812                         from->status.validkey = true;
813                         from->status.waitingforkey = false;
814                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
815                 }
816                 return true;
817         }
818
819         if(len > MTU) {
820                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
821                 return false;
822         }
823
824         vpn_packet_t inpkt;
825
826         if(type == PKT_PROBE) {
827                 inpkt.len = len;
828                 memcpy(inpkt.data, data, len);
829                 mtu_probe_h(from, &inpkt, len);
830                 return true;
831         }
832
833         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
834                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
835                 return false;
836         }
837
838         /* Check if we have the headers we need */
839         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
840                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
841                 return false;
842         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
843                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
844         }
845
846         int offset = (type & PKT_MAC) ? 0 : 14;
847         if(type & PKT_COMPRESSED) {
848                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
849                 if(ulen < 0) {
850                         return false;
851                 } else {
852                         inpkt.len = ulen + offset;
853                 }
854                 if(inpkt.len > MAXSIZE)
855                         abort();
856         } else {
857                 memcpy(inpkt.data + offset, data, len);
858                 inpkt.len = len + offset;
859         }
860
861         /* Generate the Ethernet packet type if necessary */
862         if(offset) {
863                 switch(inpkt.data[14] >> 4) {
864                         case 4:
865                                 inpkt.data[12] = 0x08;
866                                 inpkt.data[13] = 0x00;
867                                 break;
868                         case 6:
869                                 inpkt.data[12] = 0x86;
870                                 inpkt.data[13] = 0xDD;
871                                 break;
872                         default:
873                                 logger(DEBUG_TRAFFIC, LOG_ERR,
874                                                    "Unknown IP version %d while reading packet from %s (%s)",
875                                                    inpkt.data[14] >> 4, from->name, from->hostname);
876                                 return false;
877                 }
878         }
879
880         receive_packet(from, &inpkt);
881         return true;
882 }
883
884 /*
885   send a packet to the given vpn ip.
886 */
887 void send_packet(node_t *n, vpn_packet_t *packet) {
888         node_t *via;
889
890         if(n == myself) {
891                 if(overwrite_mac)
892                          memcpy(packet->data, mymac.x, ETH_ALEN);
893                 n->out_packets++;
894                 n->out_bytes += packet->len;
895                 devops.write(packet);
896                 return;
897         }
898
899         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
900                            packet->len, n->name, n->hostname);
901
902         if(!n->status.reachable) {
903                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
904                                    n->name, n->hostname);
905                 return;
906         }
907
908         n->out_packets++;
909         n->out_bytes += packet->len;
910
911         if(n->status.sptps) {
912                 send_sptps_packet(n, packet);
913                 return;
914         }
915
916         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
917
918         if(via != n)
919                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
920                            n->name, via->name, n->via->hostname);
921
922         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
923                 if(!send_tcppacket(via->connection, packet))
924                         terminate_connection(via->connection, true);
925         } else
926                 send_udppacket(via, packet);
927 }
928
929 /* Broadcast a packet using the minimum spanning tree */
930
931 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
932         // Always give ourself a copy of the packet.
933         if(from != myself)
934                 send_packet(myself, packet);
935
936         // In TunnelServer mode, do not forward broadcast packets.
937         // The MST might not be valid and create loops.
938         if(tunnelserver || broadcast_mode == BMODE_NONE)
939                 return;
940
941         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
942                            packet->len, from->name, from->hostname);
943
944         switch(broadcast_mode) {
945                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
946                 // This guarantees all nodes receive the broadcast packet, and
947                 // usually distributes the sending of broadcast packets over all nodes.
948                 case BMODE_MST:
949                         for list_each(connection_t, c, connection_list)
950                                 if(c->status.active && c->status.mst && c != from->nexthop->connection)
951                                         send_packet(c->node, packet);
952                         break;
953
954                 // In direct mode, we send copies to each node we know of.
955                 // However, this only reaches nodes that can be reached in a single hop.
956                 // We don't have enough information to forward broadcast packets in this case.
957                 case BMODE_DIRECT:
958                         if(from != myself)
959                                 break;
960
961                         for splay_each(node_t, n, node_tree)
962                                 if(n->status.reachable && ((n->via == myself && n->nexthop == n) || n->via == n))
963                                         send_packet(n, packet);
964                         break;
965
966                 default:
967                         break;
968         }
969 }
970
971 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
972         node_t *n = NULL;
973         bool hard = false;
974         static time_t last_hard_try = 0;
975
976         for splay_each(edge_t, e, edge_weight_tree) {
977                 if(!e->to->status.reachable || e->to == myself)
978                         continue;
979
980                 if(sockaddrcmp_noport(from, &e->address)) {
981                         if(last_hard_try == now.tv_sec)
982                                 continue;
983                         hard = true;
984                 }
985
986                 if(!try_mac(e->to, pkt))
987                         continue;
988
989                 n = e->to;
990                 break;
991         }
992
993         if(hard)
994                 last_hard_try = now.tv_sec;
995
996         last_hard_try = now.tv_sec;
997         return n;
998 }
999
1000 void handle_incoming_vpn_data(void *data, int flags) {
1001         listen_socket_t *ls = data;
1002         vpn_packet_t pkt;
1003         char *hostname;
1004         sockaddr_t from = {{0}};
1005         socklen_t fromlen = sizeof from;
1006         node_t *n;
1007         int len;
1008
1009         len = recvfrom(ls->udp.fd, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
1010
1011         if(len <= 0 || len > MAXSIZE) {
1012                 if(!sockwouldblock(sockerrno))
1013                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1014                 return;
1015         }
1016
1017         pkt.len = len;
1018
1019         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1020
1021         n = lookup_node_udp(&from);
1022
1023         if(!n) {
1024                 n = try_harder(&from, &pkt);
1025                 if(n)
1026                         update_node_udp(n, &from);
1027                 else if(debug_level >= DEBUG_PROTOCOL) {
1028                         hostname = sockaddr2hostname(&from);
1029                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1030                         free(hostname);
1031                         return;
1032                 }
1033                 else
1034                         return;
1035         }
1036
1037         n->sock = ls - listen_socket;
1038
1039         receive_udppacket(n, &pkt);
1040 }
1041
1042 void handle_device_data(void *data, int flags) {
1043         vpn_packet_t packet;
1044
1045         packet.priority = 0;
1046
1047         if(devops.read(&packet)) {
1048                 myself->in_packets++;
1049                 myself->in_bytes += packet.len;
1050                 route(myself, &packet);
1051         }
1052 }