diff options
| -rw-r--r-- | networking/ntpd.c | 46 |
1 files changed, 39 insertions, 7 deletions
diff --git a/networking/ntpd.c b/networking/ntpd.c index fd2f24a89..ed83415b4 100644 --- a/networking/ntpd.c +++ b/networking/ntpd.c | |||
| @@ -90,12 +90,24 @@ | |||
| 90 | * was hibernated, someone set totally wrong date, etc), | 90 | * was hibernated, someone set totally wrong date, etc), |
| 91 | * then the time is stepped, all datapoints are discarded, | 91 | * then the time is stepped, all datapoints are discarded, |
| 92 | * and we go back to steady state. | 92 | * and we go back to steady state. |
| 93 | * | ||
| 94 | * Made some changes to speed up re-syncing after our clock goes bad | ||
| 95 | * (tested with suspending my laptop): | ||
| 96 | * - if largish offset (>= STEP_THRESHOLD * 8 == 1 sec) is seen | ||
| 97 | * from a peer, schedule next query for this peer soon | ||
| 98 | * without drastically lowering poll interval for everybody. | ||
| 99 | * This makes us collect enough data for step much faster: | ||
| 100 | * e.g. at poll = 10 (1024 secs), step was done within 5 minutes | ||
| 101 | * after first reply which indicated that our clock is 14 seconds off. | ||
| 102 | * - on step, do not discard d_dispersion data of the existing datapoints, | ||
| 103 | * do not clear reachable_bits. This prevents discarding first ~8 | ||
| 104 | * datapoints after the step. | ||
| 93 | */ | 105 | */ |
| 94 | 106 | ||
| 95 | #define RETRY_INTERVAL 5 /* on error, retry in N secs */ | 107 | #define RETRY_INTERVAL 5 /* on error, retry in N secs */ |
| 96 | #define RESPONSE_INTERVAL 15 /* wait for reply up to N secs */ | 108 | #define RESPONSE_INTERVAL 15 /* wait for reply up to N secs */ |
| 97 | #define INITIAL_SAMPLES 4 /* how many samples do we want for init */ | 109 | #define INITIAL_SAMPLES 4 /* how many samples do we want for init */ |
| 98 | #define BAD_DELAY_GROWTH 4 /* drop packet if its delay grew by more than this */ | 110 | #define BAD_DELAY_GROWTH 4 /* drop packet if its delay grew by more than this */ |
| 99 | 111 | ||
| 100 | /* Clock discipline parameters and constants */ | 112 | /* Clock discipline parameters and constants */ |
| 101 | 113 | ||
| @@ -109,6 +121,10 @@ | |||
| 109 | #define FREQ_TOLERANCE 0.000015 /* frequency tolerance (15 PPM) */ | 121 | #define FREQ_TOLERANCE 0.000015 /* frequency tolerance (15 PPM) */ |
| 110 | #define BURSTPOLL 0 /* initial poll */ | 122 | #define BURSTPOLL 0 /* initial poll */ |
| 111 | #define MINPOLL 5 /* minimum poll interval. std ntpd uses 6 (6: 64 sec) */ | 123 | #define MINPOLL 5 /* minimum poll interval. std ntpd uses 6 (6: 64 sec) */ |
| 124 | /* If we got largish offset from a peer, cap next query interval | ||
| 125 | * for this peer by this many seconds: | ||
| 126 | */ | ||
| 127 | #define BIGOFF_INTERVAL (1 << 6) | ||
| 112 | /* If offset > discipline_jitter * POLLADJ_GATE, and poll interval is >= 2^BIGPOLL, | 128 | /* If offset > discipline_jitter * POLLADJ_GATE, and poll interval is >= 2^BIGPOLL, |
| 113 | * then it is decreased _at once_. (If < 2^BIGPOLL, it will be decreased _eventually_). | 129 | * then it is decreased _at once_. (If < 2^BIGPOLL, it will be decreased _eventually_). |
| 114 | */ | 130 | */ |
| @@ -1658,11 +1674,13 @@ recv_and_process_peer_pkt(peer_t *p) | |||
| 1658 | ssize_t size; | 1674 | ssize_t size; |
| 1659 | msg_t msg; | 1675 | msg_t msg; |
| 1660 | double T1, T2, T3, T4; | 1676 | double T1, T2, T3, T4; |
| 1661 | double dv; | 1677 | double dv, offset; |
| 1662 | unsigned interval; | 1678 | unsigned interval; |
| 1663 | datapoint_t *datapoint; | 1679 | datapoint_t *datapoint; |
| 1664 | peer_t *q; | 1680 | peer_t *q; |
| 1665 | 1681 | ||
| 1682 | offset = 0; | ||
| 1683 | |||
| 1666 | /* We can recvfrom here and check from.IP, but some multihomed | 1684 | /* We can recvfrom here and check from.IP, but some multihomed |
| 1667 | * ntp servers reply from their *other IP*. | 1685 | * ntp servers reply from their *other IP*. |
| 1668 | * TODO: maybe we should check at least what we can: from.port == 123? | 1686 | * TODO: maybe we should check at least what we can: from.port == 123? |
| @@ -1766,13 +1784,13 @@ recv_and_process_peer_pkt(peer_t *p) | |||
| 1766 | p->datapoint_idx = p->reachable_bits ? (p->datapoint_idx + 1) % NUM_DATAPOINTS : 0; | 1784 | p->datapoint_idx = p->reachable_bits ? (p->datapoint_idx + 1) % NUM_DATAPOINTS : 0; |
| 1767 | datapoint = &p->filter_datapoint[p->datapoint_idx]; | 1785 | datapoint = &p->filter_datapoint[p->datapoint_idx]; |
| 1768 | datapoint->d_recv_time = T4; | 1786 | datapoint->d_recv_time = T4; |
| 1769 | datapoint->d_offset = ((T2 - T1) + (T3 - T4)) / 2; | 1787 | datapoint->d_offset = offset = ((T2 - T1) + (T3 - T4)) / 2; |
| 1770 | datapoint->d_dispersion = LOG2D(msg.m_precision_exp) + G_precision_sec; | 1788 | datapoint->d_dispersion = LOG2D(msg.m_precision_exp) + G_precision_sec; |
| 1771 | if (!p->reachable_bits) { | 1789 | if (!p->reachable_bits) { |
| 1772 | /* 1st datapoint ever - replicate offset in every element */ | 1790 | /* 1st datapoint ever - replicate offset in every element */ |
| 1773 | int i; | 1791 | int i; |
| 1774 | for (i = 0; i < NUM_DATAPOINTS; i++) { | 1792 | for (i = 0; i < NUM_DATAPOINTS; i++) { |
| 1775 | p->filter_datapoint[i].d_offset = datapoint->d_offset; | 1793 | p->filter_datapoint[i].d_offset = offset; |
| 1776 | } | 1794 | } |
| 1777 | } | 1795 | } |
| 1778 | 1796 | ||
| @@ -1780,7 +1798,7 @@ recv_and_process_peer_pkt(peer_t *p) | |||
| 1780 | if ((MAX_VERBOSE && G.verbose) || (option_mask32 & OPT_w)) { | 1798 | if ((MAX_VERBOSE && G.verbose) || (option_mask32 & OPT_w)) { |
| 1781 | bb_error_msg("reply from %s: offset:%+f delay:%f status:0x%02x strat:%d refid:0x%08x rootdelay:%f reach:0x%02x", | 1799 | bb_error_msg("reply from %s: offset:%+f delay:%f status:0x%02x strat:%d refid:0x%08x rootdelay:%f reach:0x%02x", |
| 1782 | p->p_dotted, | 1800 | p->p_dotted, |
| 1783 | datapoint->d_offset, | 1801 | offset, |
| 1784 | p->lastpkt_delay, | 1802 | p->lastpkt_delay, |
| 1785 | p->lastpkt_status, | 1803 | p->lastpkt_status, |
| 1786 | p->lastpkt_stratum, | 1804 | p->lastpkt_stratum, |
| @@ -1865,6 +1883,20 @@ recv_and_process_peer_pkt(peer_t *p) | |||
| 1865 | /* Decide when to send new query for this peer */ | 1883 | /* Decide when to send new query for this peer */ |
| 1866 | pick_normal_interval: | 1884 | pick_normal_interval: |
| 1867 | interval = poll_interval(0); | 1885 | interval = poll_interval(0); |
| 1886 | if (fabs(offset) >= STEP_THRESHOLD * 8 && interval > BIGOFF_INTERVAL) { | ||
| 1887 | /* If we are synced, offsets are less than STEP_THRESHOLD, | ||
| 1888 | * or at the very least not much larger than it. | ||
| 1889 | * Now we see a largish one. | ||
| 1890 | * Either this peer is feeling bad, or packet got corrupted, | ||
| 1891 | * or _our_ clock is wrong now and _all_ peers will show similar | ||
| 1892 | * largish offsets too. | ||
| 1893 | * I observed this with laptop suspend stopping clock. | ||
| 1894 | * In any case, it makes sense to make next request soonish: | ||
| 1895 | * cases 1 and 2: get a better datapoint, | ||
| 1896 | * case 3: allows to resync faster. | ||
| 1897 | */ | ||
| 1898 | interval = BIGOFF_INTERVAL; | ||
| 1899 | } | ||
| 1868 | 1900 | ||
| 1869 | set_next_and_ret: | 1901 | set_next_and_ret: |
| 1870 | set_next(p, interval); | 1902 | set_next(p, interval); |
