ntpd: improve treatment of DNS resolution failures
authorDenys Vlasenko <vda.linux@googlemail.com>
Tue, 31 Oct 2017 11:44:37 +0000 (12:44 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Tue, 31 Oct 2017 11:44:37 +0000 (12:44 +0100)
function                                             old     new   delta
ntpd_main                                           1106    1177     +71
resolve_peer_hostname                                122     127      +5
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 76/0)               Total: 76 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
networking/ntpd.c

index 8f792d16df4c3d86d8b5924ab1fcb1ac109fe88a..e0c9810a1f9522bf6de677cace0d768a7d7a59d6 100644 (file)
 #define RETRY_INTERVAL    32    /* on send/recv error, retry in N secs (need to be power of 2) */
 #define NOREPLY_INTERVAL 512    /* sent, but got no reply: cap next query by this many seconds */
 #define RESPONSE_INTERVAL 16    /* wait for reply up to N secs */
-#define HOSTNAME_INTERVAL  5    /* hostname lookup failed. Wait N secs for next try */
+#define HOSTNAME_INTERVAL  4    /* hostname lookup failed. Wait N * peer->dns_errors secs for next try */
+#define DNS_ERRORS_CAP  0x3f    /* peer->dns_errors is in [0..63] */
 
 /* Step threshold (sec). std ntpd uses 0.128.
  */
@@ -301,6 +302,7 @@ typedef struct {
        uint8_t          lastpkt_status;
        uint8_t          lastpkt_stratum;
        uint8_t          reachable_bits;
+       uint8_t          dns_errors;
        /* when to send new query (if p_fd == -1)
         * or when receive times out (if p_fd >= 0): */
        double           next_action_time;
@@ -802,10 +804,9 @@ resolve_peer_hostname(peer_t *p)
                p->p_dotted = xmalloc_sockaddr2dotted_noport(&lsa->u.sa);
                VERB1 if (strcmp(p->p_hostname, p->p_dotted) != 0)
                        bb_error_msg("'%s' is %s", p->p_hostname, p->p_dotted);
-       } else {
-               /* error message is emitted by host2sockaddr() */
-               set_next(p, HOSTNAME_INTERVAL);
+               p->dns_errors = 0;
        }
+       p->dns_errors = ((p->dns_errors << 1) | 1) & DNS_ERRORS_CAP;
        return lsa;
 }
 
@@ -2358,14 +2359,6 @@ int ntpd_main(int argc UNUSED_PARAM, char **argv)
                int nfds, timeout;
                double nextaction;
 
-               /* Resolve peer names to IPs, if not resolved yet */
-               for (item = G.ntp_peers; item != NULL; item = item->link) {
-                       peer_t *p = (peer_t *) item->data;
-
-                       if (p->next_action_time <= G.cur_time && !p->p_lsa)
-                               resolve_peer_hostname(p);
-               }
-
                /* Nothing between here and poll() blocks for any significant time */
 
                nextaction = G.cur_time + 3600;
@@ -2447,13 +2440,38 @@ int ntpd_main(int argc UNUSED_PARAM, char **argv)
  did_poll:
                gettime1900d(); /* sets G.cur_time */
                if (nfds <= 0) {
-                       if (!bb_got_signal /* poll wasn't interrupted by a signal */
-                        && G.cur_time - G.last_script_run > 11*60
-                       ) {
+                       double ct;
+
+                       if (bb_got_signal)
+                               break; /* poll was interrupted by a signal */
+
+                       if (G.cur_time - G.last_script_run > 11*60) {
                                /* Useful for updating battery-backed RTC and such */
                                run_script("periodic", G.last_update_offset);
                                gettime1900d(); /* sets G.cur_time */
                        }
+
+                       /* Resolve peer names to IPs, if not resolved yet.
+                        * We do it only when poll timed out:
+                        * this way, we almost never overlap DNS resolution with
+                        * "request-reply" packet round trip.
+                        */
+                       ct = G.cur_time;
+                       for (item = G.ntp_peers; item != NULL; item = item->link) {
+                               peer_t *p = (peer_t *) item->data;
+                               if (p->next_action_time <= ct && !p->p_lsa) {
+                                       /* This can take up to ~10 sec per each DNS query */
+                                       resolve_peer_hostname(p);
+                               }
+                       }
+                       gettime1900d(); /* sets G.cur_time (needed for set_next()) */
+                       /* Set next time for those which are still not resolved */
+                       for (item = G.ntp_peers; item != NULL; item = item->link) {
+                               peer_t *p = (peer_t *) item->data;
+                               if (p->next_action_time <= ct && !p->p_lsa) {
+                                       set_next(p, HOSTNAME_INTERVAL * p->dns_errors);
+                               }
+                       }
                        goto check_unsync;
                }