blob: 3e1198758264063020792148351d0968c6e5b6de [file] [log] [blame]
Mike J. Chen6c929512011-08-15 11:59:47 -07001/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * A service that exchanges time synchronization information between
19 * a master that defines a timeline and clients that follow the timeline.
20 */
21
22#define LOG_TAG "common_time"
23#include <utils/Log.h>
24
25#include <arpa/inet.h>
26#include <assert.h>
27#include <fcntl.h>
Mike J. Chen6c929512011-08-15 11:59:47 -070028#include <linux/if_ether.h>
29#include <net/if.h>
30#include <net/if_arp.h>
31#include <netinet/ip.h>
32#include <poll.h>
33#include <stdio.h>
34#include <sys/eventfd.h>
35#include <sys/ioctl.h>
36#include <sys/stat.h>
37#include <sys/types.h>
38#include <sys/socket.h>
39
40#include <common_time/local_clock.h>
41#include <binder/IPCThreadState.h>
42#include <binder/ProcessState.h>
43#include <utils/Timers.h>
44
45#include "common_clock_service.h"
46#include "common_time_config_service.h"
47#include "common_time_server.h"
48#include "common_time_server_packets.h"
49#include "clock_recovery.h"
50#include "common_clock.h"
51
John Grossmanb8525e92012-02-16 14:53:24 -080052#define MAX_INT ((int)0x7FFFFFFF)
Mike J. Chen6c929512011-08-15 11:59:47 -070053
54namespace android {
55
John Grossman7a947c42012-08-21 16:39:11 -070056const char* CommonTimeServer::kDefaultMasterElectionAddr = "255.255.255.255";
57const uint16_t CommonTimeServer::kDefaultMasterElectionPort = 8886;
58const uint64_t CommonTimeServer::kDefaultSyncGroupID = 1;
Mike J. Chen6c929512011-08-15 11:59:47 -070059const uint8_t CommonTimeServer::kDefaultMasterPriority = 1;
60const uint32_t CommonTimeServer::kDefaultMasterAnnounceIntervalMs = 10000;
61const uint32_t CommonTimeServer::kDefaultSyncRequestIntervalMs = 1000;
62const uint32_t CommonTimeServer::kDefaultPanicThresholdUsec = 50000;
63const bool CommonTimeServer::kDefaultAutoDisable = true;
64const int CommonTimeServer::kSetupRetryTimeoutMs = 30000;
65const int64_t CommonTimeServer::kNoGoodDataPanicThresholdUsec = 600000000ll;
66const uint32_t CommonTimeServer::kRTTDiscardPanicThreshMultiplier = 5;
67
68// timeout value representing an infinite timeout
69const int CommonTimeServer::kInfiniteTimeout = -1;
70
71/*** Initial state constants ***/
72
73// number of WhoIsMaster attempts sent before giving up
74const int CommonTimeServer::kInitial_NumWhoIsMasterRetries = 6;
75
76// timeout used when waiting for a response to a WhoIsMaster request
77const int CommonTimeServer::kInitial_WhoIsMasterTimeoutMs = 500;
78
79/*** Client state constants ***/
80
81// number of sync requests that can fail before a client assumes its master
82// is dead
John Grossmane1d6c082012-04-09 11:26:16 -070083const int CommonTimeServer::kClient_NumSyncRequestRetries = 10;
Mike J. Chen6c929512011-08-15 11:59:47 -070084
85/*** Master state constants ***/
86
87/*** Ronin state constants ***/
88
89// number of WhoIsMaster attempts sent before declaring ourselves master
John Grossmane1d6c082012-04-09 11:26:16 -070090const int CommonTimeServer::kRonin_NumWhoIsMasterRetries = 20;
Mike J. Chen6c929512011-08-15 11:59:47 -070091
92// timeout used when waiting for a response to a WhoIsMaster request
93const int CommonTimeServer::kRonin_WhoIsMasterTimeoutMs = 500;
94
95/*** WaitForElection state constants ***/
96
97// how long do we wait for an announcement from a master before
98// trying another election?
John Grossmane1d6c082012-04-09 11:26:16 -070099const int CommonTimeServer::kWaitForElection_TimeoutMs = 12500;
Mike J. Chen6c929512011-08-15 11:59:47 -0700100
101CommonTimeServer::CommonTimeServer()
102 : Thread(false)
103 , mState(ICommonClock::STATE_INITIAL)
104 , mClockRecovery(&mLocalClock, &mCommonClock)
105 , mSocket(-1)
106 , mLastPacketRxLocalTime(0)
107 , mTimelineID(ICommonClock::kInvalidTimelineID)
108 , mClockSynced(false)
109 , mCommonClockHasClients(false)
John Grossman79489c42012-07-20 10:17:26 -0700110 , mStateChangeLog("Recent State Change Events", 30)
111 , mElectionLog("Recent Master Election Traffic", 30)
112 , mBadPktLog("Recent Bad Packet RX Info", 8)
Mike J. Chen6c929512011-08-15 11:59:47 -0700113 , mInitial_WhoIsMasterRequestTimeouts(0)
114 , mClient_MasterDeviceID(0)
115 , mClient_MasterDevicePriority(0)
116 , mRonin_WhoIsMasterRequestTimeouts(0) {
117 // zero out sync stats
118 resetSyncStats();
119
120 // Setup the master election endpoint to use the default.
121 struct sockaddr_in* meep =
122 reinterpret_cast<struct sockaddr_in*>(&mMasterElectionEP);
123 memset(&mMasterElectionEP, 0, sizeof(mMasterElectionEP));
124 inet_aton(kDefaultMasterElectionAddr, &meep->sin_addr);
125 meep->sin_family = AF_INET;
126 meep->sin_port = htons(kDefaultMasterElectionPort);
127
128 // Zero out the master endpoint.
129 memset(&mMasterEP, 0, sizeof(mMasterEP));
130 mMasterEPValid = false;
131 mBindIfaceValid = false;
132 setForceLowPriority(false);
133
134 // Set all remaining configuration parameters to their defaults.
135 mDeviceID = 0;
136 mSyncGroupID = kDefaultSyncGroupID;
137 mMasterPriority = kDefaultMasterPriority;
138 mMasterAnnounceIntervalMs = kDefaultMasterAnnounceIntervalMs;
139 mSyncRequestIntervalMs = kDefaultSyncRequestIntervalMs;
140 mPanicThresholdUsec = kDefaultPanicThresholdUsec;
141 mAutoDisable = kDefaultAutoDisable;
142
143 // Create the eventfd we will use to signal our thread to wake up when
144 // needed.
145 mWakeupThreadFD = eventfd(0, EFD_NONBLOCK);
146
147 // seed the random number generator (used to generated timeline IDs)
148 srand48(static_cast<unsigned int>(systemTime()));
149}
150
151CommonTimeServer::~CommonTimeServer() {
152 shutdownThread();
153
154 // No need to grab the lock here. We are in the destructor; if the the user
155 // has a thread in any of the APIs while the destructor is being called,
156 // there is a threading problem a the application level we cannot reasonably
157 // do anything about.
158 cleanupSocket_l();
159
160 if (mWakeupThreadFD >= 0) {
161 close(mWakeupThreadFD);
162 mWakeupThreadFD = -1;
163 }
164}
165
166bool CommonTimeServer::startServices() {
167 // start the ICommonClock service
168 mICommonClock = CommonClockService::instantiate(*this);
169 if (mICommonClock == NULL)
170 return false;
171
172 // start the ICommonTimeConfig service
173 mICommonTimeConfig = CommonTimeConfigService::instantiate(*this);
174 if (mICommonTimeConfig == NULL)
175 return false;
176
177 return true;
178}
179
180bool CommonTimeServer::threadLoop() {
181 // Register our service interfaces.
182 if (!startServices())
183 return false;
184
185 // Hold the lock while we are in the main thread loop. It will release the
186 // lock when it blocks, and hold the lock at all other times.
187 mLock.lock();
188 runStateMachine_l();
189 mLock.unlock();
190
191 IPCThreadState::self()->stopProcess();
192 return false;
193}
194
195bool CommonTimeServer::runStateMachine_l() {
196 if (!mLocalClock.initCheck())
197 return false;
198
199 if (!mCommonClock.init(mLocalClock.getLocalFreq()))
200 return false;
201
202 // Enter the initial state.
203 becomeInitial("startup");
204
205 // run the state machine
206 while (!exitPending()) {
207 struct pollfd pfds[2];
John Grossmanc7f57c62012-06-26 12:50:28 -0700208 int rc, timeout;
Mike J. Chen6c929512011-08-15 11:59:47 -0700209 int eventCnt = 0;
210 int64_t wakeupTime;
John Grossmanc7f57c62012-06-26 12:50:28 -0700211 uint32_t t1, t2;
212 bool needHandleTimeout = false;
Mike J. Chen6c929512011-08-15 11:59:47 -0700213
214 // We are always interested in our wakeup FD.
215 pfds[eventCnt].fd = mWakeupThreadFD;
216 pfds[eventCnt].events = POLLIN;
217 pfds[eventCnt].revents = 0;
218 eventCnt++;
219
220 // If we have a valid socket, then we are interested in what it has to
221 // say as well.
222 if (mSocket >= 0) {
223 pfds[eventCnt].fd = mSocket;
224 pfds[eventCnt].events = POLLIN;
225 pfds[eventCnt].revents = 0;
226 eventCnt++;
227 }
228
John Grossmanc7f57c62012-06-26 12:50:28 -0700229 t1 = static_cast<uint32_t>(mCurTimeout.msecTillTimeout());
230 t2 = static_cast<uint32_t>(mClockRecovery.applyRateLimitedSlew());
231 timeout = static_cast<int>(t1 < t2 ? t1 : t2);
232
Mike J. Chen6c929512011-08-15 11:59:47 -0700233 // Note, we were holding mLock when this function was called. We
234 // release it only while we are blocking and hold it at all other times.
235 mLock.unlock();
John Grossmanc7f57c62012-06-26 12:50:28 -0700236 rc = poll(pfds, eventCnt, timeout);
Mike J. Chen6c929512011-08-15 11:59:47 -0700237 wakeupTime = mLocalClock.getLocalTime();
238 mLock.lock();
239
240 // Is it time to shutdown? If so, don't hesitate... just do it.
241 if (exitPending())
242 break;
243
244 // Did the poll fail? This should never happen and is fatal if it does.
245 if (rc < 0) {
246 ALOGE("%s:%d poll failed", __PRETTY_FUNCTION__, __LINE__);
247 return false;
248 }
249
John Grossmanc7f57c62012-06-26 12:50:28 -0700250 if (rc == 0) {
251 needHandleTimeout = !mCurTimeout.msecTillTimeout();
252 if (needHandleTimeout)
253 mCurTimeout.setTimeout(kInfiniteTimeout);
254 }
Mike J. Chen6c929512011-08-15 11:59:47 -0700255
256 // Were we woken up on purpose? If so, clear the eventfd with a read.
257 if (pfds[0].revents)
258 clearPendingWakeupEvents_l();
259
260 // Is out bind address dirty? If so, clean up our socket (if any).
261 // Alternatively, do we have an active socket but should be auto
262 // disabled? If so, release the socket and enter the proper sync state.
263 bool droppedSocket = false;
264 if (mBindIfaceDirty || ((mSocket >= 0) && shouldAutoDisable())) {
265 cleanupSocket_l();
266 mBindIfaceDirty = false;
267 droppedSocket = true;
268 }
269
270 // Do we not have a socket but should have one? If so, try to set one
271 // up.
272 if ((mSocket < 0) && mBindIfaceValid && !shouldAutoDisable()) {
273 if (setupSocket_l()) {
274 // Success! We are now joining a new network (either coming
275 // from no network, or coming from a potentially different
276 // network). Force our priority to be lower so that we defer to
277 // any other masters which may already be on the network we are
278 // joining. Later, when we enter either the client or the
279 // master state, we will clear this flag and go back to our
280 // normal election priority.
281 setForceLowPriority(true);
282 switch (mState) {
283 // If we were in initial (whether we had a immediately
284 // before this network or not) we want to simply reset the
285 // system and start again. Forcing a transition from
286 // INITIAL to INITIAL should do the job.
287 case CommonClockService::STATE_INITIAL:
288 becomeInitial("bound interface");
289 break;
290
291 // If we were in the master state, then either we were the
292 // master in a no-network situation, or we were the master
293 // of a different network and have moved to a new interface.
John Grossmane1d6c082012-04-09 11:26:16 -0700294 // In either case, immediately transition to Ronin at low
295 // priority. If there is no one in the network we just
296 // joined, we will become master soon enough. If there is,
297 // we want to be certain to defer master status to the
298 // existing timeline currently running on the network.
299 //
Mike J. Chen6c929512011-08-15 11:59:47 -0700300 case CommonClockService::STATE_MASTER:
John Grossmane1d6c082012-04-09 11:26:16 -0700301 becomeRonin("leaving networkless mode");
Mike J. Chen6c929512011-08-15 11:59:47 -0700302 break;
303
304 // If we were in any other state (CLIENT, RONIN, or
305 // WAIT_FOR_ELECTION) then we must be moving from one
306 // network to another. We have lost our old master;
307 // transition to RONIN in an attempt to find a new master.
308 // If there are none out there, we will just assume
309 // responsibility for the timeline we used to be a client
310 // of.
311 default:
312 becomeRonin("bound interface");
313 break;
314 }
315 } else {
316 // That's odd... we failed to set up our socket. This could be
317 // due to some transient network change which will work itself
318 // out shortly; schedule a retry attempt in the near future.
319 mCurTimeout.setTimeout(kSetupRetryTimeoutMs);
320 }
321
322 // One way or the other, we don't have any data to process at this
323 // point (since we just tried to bulid a new socket). Loop back
324 // around and wait for the next thing to do.
325 continue;
326 } else if (droppedSocket) {
327 // We just lost our socket, and for whatever reason (either no
328 // config, or auto disable engaged) we are not supposed to rebuild
329 // one at this time. We are not going to rebuild our socket until
330 // something about our config/auto-disabled status changes, so we
331 // are basically in network-less mode. If we are already in either
332 // INITIAL or MASTER, just stay there until something changes. If
333 // we are in any other state (CLIENT, RONIN or WAIT_FOR_ELECTION),
334 // then transition to either INITIAL or MASTER depending on whether
335 // or not our timeline is valid.
John Grossman79489c42012-07-20 10:17:26 -0700336 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
337 "Entering networkless mode interface is %s, "
338 "shouldAutoDisable = %s",
339 mBindIfaceValid ? "valid" : "invalid",
340 shouldAutoDisable() ? "true" : "false");
Mike J. Chen6c929512011-08-15 11:59:47 -0700341 if ((mState != ICommonClock::STATE_INITIAL) &&
342 (mState != ICommonClock::STATE_MASTER)) {
343 if (mTimelineID == ICommonClock::kInvalidTimelineID)
344 becomeInitial("network-less mode");
345 else
346 becomeMaster("network-less mode");
347 }
348
349 continue;
350 }
351
John Grossmanc7f57c62012-06-26 12:50:28 -0700352 // Time to handle the timeouts?
353 if (needHandleTimeout) {
Mike J. Chen6c929512011-08-15 11:59:47 -0700354 if (!handleTimeout())
355 ALOGE("handleTimeout failed");
356 continue;
357 }
358
359 // Does our socket have data for us (assuming we still have one, we
360 // may have RXed a packet at the same time as a config change telling us
361 // to shut our socket down)? If so, process its data.
362 if ((mSocket >= 0) && (eventCnt > 1) && (pfds[1].revents)) {
363 mLastPacketRxLocalTime = wakeupTime;
364 if (!handlePacket())
365 ALOGE("handlePacket failed");
366 }
367 }
368
369 cleanupSocket_l();
370 return true;
371}
372
373void CommonTimeServer::clearPendingWakeupEvents_l() {
374 int64_t tmp;
375 read(mWakeupThreadFD, &tmp, sizeof(tmp));
376}
377
378void CommonTimeServer::wakeupThread_l() {
379 int64_t tmp = 1;
380 write(mWakeupThreadFD, &tmp, sizeof(tmp));
381}
382
383void CommonTimeServer::cleanupSocket_l() {
384 if (mSocket >= 0) {
385 close(mSocket);
386 mSocket = -1;
387 }
388}
389
390void CommonTimeServer::shutdownThread() {
391 // Flag the work thread for shutdown.
392 this->requestExit();
393
394 // Signal the thread in case its sleeping.
395 mLock.lock();
396 wakeupThread_l();
397 mLock.unlock();
398
399 // Wait for the thread to exit.
400 this->join();
401}
402
403bool CommonTimeServer::setupSocket_l() {
404 int rc;
405 bool ret_val = false;
406 struct sockaddr_in* ipv4_addr = NULL;
407 char masterElectionEPStr[64];
408 const int one = 1;
409
410 // This should never be needed, but if we happened to have an old socket
411 // lying around, be sure to not leak it before proceeding.
412 cleanupSocket_l();
413
414 // If we don't have a valid endpoint to bind to, then how did we get here in
415 // the first place? Regardless, we know that we are going to fail to bind,
416 // so don't even try.
417 if (!mBindIfaceValid)
418 return false;
419
420 sockaddrToString(mMasterElectionEP, true, masterElectionEPStr,
421 sizeof(masterElectionEPStr));
John Grossman79489c42012-07-20 10:17:26 -0700422 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
423 "Building socket :: bind = %s master election = %s",
424 mBindIface.string(), masterElectionEPStr);
Mike J. Chen6c929512011-08-15 11:59:47 -0700425
426 // TODO: add proper support for IPv6. Right now, we block IPv6 addresses at
427 // the configuration interface level.
428 if (AF_INET != mMasterElectionEP.ss_family) {
John Grossman79489c42012-07-20 10:17:26 -0700429 mStateChangeLog.log(ANDROID_LOG_WARN, LOG_TAG,
430 "TODO: add proper IPv6 support");
Mike J. Chen6c929512011-08-15 11:59:47 -0700431 goto bailout;
432 }
433
434 // open a UDP socket for the timeline serivce
435 mSocket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
436 if (mSocket < 0) {
John Grossman79489c42012-07-20 10:17:26 -0700437 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
438 "Failed to create socket (errno = %d)", errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700439 goto bailout;
440 }
441
442 // Bind to the selected interface using Linux's spiffy SO_BINDTODEVICE.
443 struct ifreq ifr;
444 memset(&ifr, 0, sizeof(ifr));
445 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", mBindIface.string());
446 ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = 0;
447 rc = setsockopt(mSocket, SOL_SOCKET, SO_BINDTODEVICE,
448 (void *)&ifr, sizeof(ifr));
449 if (rc) {
John Grossman79489c42012-07-20 10:17:26 -0700450 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
451 "Failed to bind socket at to interface %s "
452 "(errno = %d)", ifr.ifr_name, errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700453 goto bailout;
454 }
455
456 // Bind our socket to INADDR_ANY and the master election port. The
457 // interface binding we made using SO_BINDTODEVICE should limit us to
458 // traffic only on the interface we are interested in. We need to bind to
459 // INADDR_ANY and the specific master election port in order to be able to
460 // receive both unicast traffic and master election multicast traffic with
461 // just a single socket.
462 struct sockaddr_in bindAddr;
463 ipv4_addr = reinterpret_cast<struct sockaddr_in*>(&mMasterElectionEP);
464 memcpy(&bindAddr, ipv4_addr, sizeof(bindAddr));
465 bindAddr.sin_addr.s_addr = INADDR_ANY;
466 rc = bind(mSocket,
467 reinterpret_cast<const sockaddr *>(&bindAddr),
468 sizeof(bindAddr));
469 if (rc) {
John Grossman79489c42012-07-20 10:17:26 -0700470 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
471 "Failed to bind socket to port %hu (errno = %d)",
472 ntohs(bindAddr.sin_port), errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700473 goto bailout;
474 }
475
476 if (0xE0000000 == (ntohl(ipv4_addr->sin_addr.s_addr) & 0xF0000000)) {
477 // If our master election endpoint is a multicast address, be sure to join
478 // the multicast group.
479 struct ip_mreq mreq;
480 mreq.imr_multiaddr = ipv4_addr->sin_addr;
481 mreq.imr_interface.s_addr = htonl(INADDR_ANY);
482 rc = setsockopt(mSocket, IPPROTO_IP, IP_ADD_MEMBERSHIP,
483 &mreq, sizeof(mreq));
484 if (rc == -1) {
485 ALOGE("Failed to join multicast group at %s. (errno = %d)",
486 masterElectionEPStr, errno);
487 goto bailout;
488 }
489
490 // disable loopback of multicast packets
491 const int zero = 0;
492 rc = setsockopt(mSocket, IPPROTO_IP, IP_MULTICAST_LOOP,
493 &zero, sizeof(zero));
494 if (rc == -1) {
John Grossman79489c42012-07-20 10:17:26 -0700495 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
496 "Failed to disable multicast loopback "
497 "(errno = %d)", errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700498 goto bailout;
499 }
500 } else
Jason Simmonsdb632602012-07-17 15:48:53 -0700501 if (ntohl(ipv4_addr->sin_addr.s_addr) == 0xFFFFFFFF) {
502 // If the master election address is the broadcast address, then enable
503 // the broadcast socket option
Jason Simmonsdb632602012-07-17 15:48:53 -0700504 rc = setsockopt(mSocket, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one));
505 if (rc == -1) {
John Grossman79489c42012-07-20 10:17:26 -0700506 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
507 "Failed to enable broadcast (errno = %d)",
508 errno);
Jason Simmonsdb632602012-07-17 15:48:53 -0700509 goto bailout;
510 }
511 } else {
Mike J. Chen6c929512011-08-15 11:59:47 -0700512 // If the master election address is neither broadcast, nor multicast,
513 // then we are misconfigured. The config API layer should prevent this
514 // from ever happening.
515 goto bailout;
516 }
517
518 // Set the TTL of sent packets to 1. (Time protocol sync should never leave
519 // the local subnet)
520 rc = setsockopt(mSocket, IPPROTO_IP, IP_TTL, &one, sizeof(one));
521 if (rc == -1) {
John Grossman79489c42012-07-20 10:17:26 -0700522 mStateChangeLog.log(ANDROID_LOG_ERROR, LOG_TAG,
523 "Failed to set TTL to %d (errno = %d)", one, errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700524 goto bailout;
525 }
526
527 // get the device's unique ID
528 if (!assignDeviceID())
529 goto bailout;
530
531 ret_val = true;
532
533bailout:
534 if (!ret_val)
535 cleanupSocket_l();
536 return ret_val;
537}
538
539// generate a unique device ID that can be used for arbitration
540bool CommonTimeServer::assignDeviceID() {
541 if (!mBindIfaceValid)
542 return false;
543
544 struct ifreq ifr;
545 memset(&ifr, 0, sizeof(ifr));
546 ifr.ifr_addr.sa_family = AF_INET;
547 strlcpy(ifr.ifr_name, mBindIface.string(), IFNAMSIZ);
548
549 int rc = ioctl(mSocket, SIOCGIFHWADDR, &ifr);
550 if (rc) {
551 ALOGE("%s:%d ioctl failed", __PRETTY_FUNCTION__, __LINE__);
552 return false;
553 }
554
555 if (ifr.ifr_addr.sa_family != ARPHRD_ETHER) {
556 ALOGE("%s:%d got non-Ethernet address", __PRETTY_FUNCTION__, __LINE__);
557 return false;
558 }
559
560 mDeviceID = 0;
561 for (int i = 0; i < ETH_ALEN; i++) {
562 mDeviceID = (mDeviceID << 8) | ifr.ifr_hwaddr.sa_data[i];
563 }
564
565 return true;
566}
567
568// generate a new timeline ID
569void CommonTimeServer::assignTimelineID() {
570 do {
571 mTimelineID = (static_cast<uint64_t>(lrand48()) << 32)
572 | static_cast<uint64_t>(lrand48());
573 } while (mTimelineID == ICommonClock::kInvalidTimelineID);
574}
575
576// Select a preference between the device IDs of two potential masters.
577// Returns true if the first ID wins, or false if the second ID wins.
578bool CommonTimeServer::arbitrateMaster(
579 uint64_t deviceID1, uint8_t devicePrio1,
580 uint64_t deviceID2, uint8_t devicePrio2) {
581 return ((devicePrio1 > devicePrio2) ||
582 ((devicePrio1 == devicePrio2) && (deviceID1 > deviceID2)));
583}
584
John Grossman79489c42012-07-20 10:17:26 -0700585static void hexDumpToString(const uint8_t* src, size_t src_len,
586 char* dst, size_t dst_len) {
John Grossmanf007bd3c2012-07-31 16:57:18 -0700587 size_t offset = 0;
John Grossman79489c42012-07-20 10:17:26 -0700588 size_t i;
589
590 for (i = 0; (i < src_len) && (offset < dst_len); ++i) {
591 int res;
592 if (0 == (i % 16)) {
Ashok Bhatf5df7002014-03-25 20:51:35 +0000593 res = snprintf(dst + offset, dst_len - offset, "\n%04zx :", i);
John Grossman79489c42012-07-20 10:17:26 -0700594 if (res < 0)
595 break;
596 offset += res;
597 if (offset >= dst_len)
598 break;
599 }
600
601 res = snprintf(dst + offset, dst_len - offset, " %02x", src[i]);
602 if (res < 0)
603 break;
604 offset += res;
605 }
606
607 dst[dst_len - 1] = 0;
608}
609
Mike J. Chen6c929512011-08-15 11:59:47 -0700610bool CommonTimeServer::handlePacket() {
611 uint8_t buf[256];
612 struct sockaddr_storage srcAddr;
613 socklen_t srcAddrLen = sizeof(srcAddr);
614
615 ssize_t recvBytes = recvfrom(
616 mSocket, buf, sizeof(buf), 0,
617 reinterpret_cast<const sockaddr *>(&srcAddr), &srcAddrLen);
618
619 if (recvBytes < 0) {
John Grossman79489c42012-07-20 10:17:26 -0700620 mBadPktLog.log(ANDROID_LOG_ERROR, LOG_TAG,
621 "recvfrom failed (res %d, errno %d)",
622 recvBytes, errno);
Mike J. Chen6c929512011-08-15 11:59:47 -0700623 return false;
624 }
625
626 UniversalTimeServicePacket pkt;
John Grossman79489c42012-07-20 10:17:26 -0700627 if (pkt.deserializePacket(buf, recvBytes, mSyncGroupID) < 0) {
628 char hex[256];
629 char srcEPStr[64];
630
631 hexDumpToString(buf, static_cast<size_t>(recvBytes), hex, sizeof(hex));
632 sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
633
634 mBadPktLog.log("Failed to parse %d byte packet from %s.%s",
635 recvBytes, srcEPStr, hex);
Mike J. Chen6c929512011-08-15 11:59:47 -0700636 return false;
John Grossman79489c42012-07-20 10:17:26 -0700637 }
Mike J. Chen6c929512011-08-15 11:59:47 -0700638
639 bool result;
640 switch (pkt.packetType) {
641 case TIME_PACKET_WHO_IS_MASTER_REQUEST:
642 result = handleWhoIsMasterRequest(&pkt.p.who_is_master_request,
643 srcAddr);
644 break;
645
646 case TIME_PACKET_WHO_IS_MASTER_RESPONSE:
647 result = handleWhoIsMasterResponse(&pkt.p.who_is_master_response,
648 srcAddr);
649 break;
650
651 case TIME_PACKET_SYNC_REQUEST:
652 result = handleSyncRequest(&pkt.p.sync_request, srcAddr);
653 break;
654
655 case TIME_PACKET_SYNC_RESPONSE:
656 result = handleSyncResponse(&pkt.p.sync_response, srcAddr);
657 break;
658
659 case TIME_PACKET_MASTER_ANNOUNCEMENT:
660 result = handleMasterAnnouncement(&pkt.p.master_announcement,
661 srcAddr);
662 break;
663
664 default: {
John Grossman79489c42012-07-20 10:17:26 -0700665 char srcEPStr[64];
666 sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
667
668 mBadPktLog.log(ANDROID_LOG_WARN, LOG_TAG,
669 "unknown packet type (%d) from %s",
670 pkt.packetType, srcEPStr);
671
Mike J. Chen6c929512011-08-15 11:59:47 -0700672 result = false;
673 } break;
674 }
675
676 return result;
677}
678
679bool CommonTimeServer::handleTimeout() {
680 // If we have no socket, then this must be a timeout to retry socket setup.
681 if (mSocket < 0)
682 return true;
683
684 switch (mState) {
685 case ICommonClock::STATE_INITIAL:
686 return handleTimeoutInitial();
687 case ICommonClock::STATE_CLIENT:
688 return handleTimeoutClient();
689 case ICommonClock::STATE_MASTER:
690 return handleTimeoutMaster();
691 case ICommonClock::STATE_RONIN:
692 return handleTimeoutRonin();
693 case ICommonClock::STATE_WAIT_FOR_ELECTION:
694 return handleTimeoutWaitForElection();
695 }
696
697 return false;
698}
699
700bool CommonTimeServer::handleTimeoutInitial() {
701 if (++mInitial_WhoIsMasterRequestTimeouts ==
702 kInitial_NumWhoIsMasterRetries) {
703 // none of our attempts to discover a master succeeded, so make
704 // this device the master
705 return becomeMaster("initial timeout");
706 } else {
707 // retry the WhoIsMaster request
708 return sendWhoIsMasterRequest();
709 }
710}
711
712bool CommonTimeServer::handleTimeoutClient() {
713 if (shouldPanicNotGettingGoodData())
714 return becomeInitial("timeout panic, no good data");
715
716 if (mClient_SyncRequestPending) {
717 mClient_SyncRequestPending = false;
718
719 if (++mClient_SyncRequestTimeouts < kClient_NumSyncRequestRetries) {
720 // a sync request has timed out, so retry
721 return sendSyncRequest();
722 } else {
723 // The master has failed to respond to a sync request for too many
724 // times in a row. Assume the master is dead and start electing
725 // a new master.
726 return becomeRonin("master not responding");
727 }
728 } else {
729 // initiate the next sync request
730 return sendSyncRequest();
731 }
732}
733
734bool CommonTimeServer::handleTimeoutMaster() {
735 // send another announcement from the master
736 return sendMasterAnnouncement();
737}
738
739bool CommonTimeServer::handleTimeoutRonin() {
740 if (++mRonin_WhoIsMasterRequestTimeouts == kRonin_NumWhoIsMasterRetries) {
741 // no other master is out there, so we won the election
742 return becomeMaster("no better masters detected");
743 } else {
744 return sendWhoIsMasterRequest();
745 }
746}
747
748bool CommonTimeServer::handleTimeoutWaitForElection() {
749 return becomeRonin("timeout waiting for election conclusion");
750}
751
752bool CommonTimeServer::handleWhoIsMasterRequest(
753 const WhoIsMasterRequestPacket* request,
754 const sockaddr_storage& srcAddr) {
John Grossman7a947c42012-08-21 16:39:11 -0700755 // Skip our own messages which come back via broadcast loopback.
756 if (request->senderDeviceID == mDeviceID)
757 return true;
John Grossman79489c42012-07-20 10:17:26 -0700758
759 char srcEPStr[64];
760 sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
761 mElectionLog.log("RXed WhoIs master request while in state %s. "
762 "src %s reqTID %016llx ourTID %016llx",
763 stateToString(mState), srcEPStr,
764 request->timelineID, mTimelineID);
765
Mike J. Chen6c929512011-08-15 11:59:47 -0700766 if (mState == ICommonClock::STATE_MASTER) {
767 // is this request related to this master's timeline?
768 if (request->timelineID != ICommonClock::kInvalidTimelineID &&
769 request->timelineID != mTimelineID)
770 return true;
771
772 WhoIsMasterResponsePacket pkt;
773 pkt.initHeader(mTimelineID, mSyncGroupID);
774 pkt.deviceID = mDeviceID;
775 pkt.devicePriority = effectivePriority();
776
John Grossman79489c42012-07-20 10:17:26 -0700777 mElectionLog.log("TXing WhoIs master resp to %s while in state %s. "
778 "ourTID %016llx ourGID %016llx ourDID %016llx "
779 "ourPrio %u",
780 srcEPStr, stateToString(mState),
781 mTimelineID, mSyncGroupID,
782 pkt.deviceID, pkt.devicePriority);
783
Mike J. Chen6c929512011-08-15 11:59:47 -0700784 uint8_t buf[256];
785 ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
786 if (bufSz < 0)
787 return false;
788
789 ssize_t sendBytes = sendto(
790 mSocket, buf, bufSz, 0,
791 reinterpret_cast<const sockaddr *>(&srcAddr),
792 sizeof(srcAddr));
793 if (sendBytes == -1) {
794 ALOGE("%s:%d sendto failed", __PRETTY_FUNCTION__, __LINE__);
795 return false;
796 }
797 } else if (mState == ICommonClock::STATE_RONIN) {
798 // if we hear a WhoIsMaster request from another device following
799 // the same timeline and that device wins arbitration, then we will stop
800 // trying to elect ourselves master and will instead wait for an
801 // announcement from the election winner
802 if (request->timelineID != mTimelineID)
803 return true;
804
805 if (arbitrateMaster(request->senderDeviceID,
806 request->senderDevicePriority,
807 mDeviceID,
808 effectivePriority()))
809 return becomeWaitForElection("would lose election");
810
811 return true;
812 } else if (mState == ICommonClock::STATE_INITIAL) {
813 // If a group of devices booted simultaneously (e.g. after a power
814 // outage) and all of them are in the initial state and there is no
815 // master, then each device may time out and declare itself master at
816 // the same time. To avoid this, listen for
817 // WhoIsMaster(InvalidTimeline) requests from peers. If we would lose
818 // arbitration against that peer, reset our timeout count so that the
819 // peer has a chance to become master before we time out.
820 if (request->timelineID == ICommonClock::kInvalidTimelineID &&
821 arbitrateMaster(request->senderDeviceID,
822 request->senderDevicePriority,
823 mDeviceID,
824 effectivePriority())) {
825 mInitial_WhoIsMasterRequestTimeouts = 0;
826 }
827 }
828
829 return true;
830}
831
832bool CommonTimeServer::handleWhoIsMasterResponse(
833 const WhoIsMasterResponsePacket* response,
834 const sockaddr_storage& srcAddr) {
John Grossman7a947c42012-08-21 16:39:11 -0700835 // Skip our own messages which come back via broadcast loopback.
836 if (response->deviceID == mDeviceID)
837 return true;
838
John Grossman79489c42012-07-20 10:17:26 -0700839 char srcEPStr[64];
840 sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
841 mElectionLog.log("RXed WhoIs master response while in state %s. "
842 "src %s respTID %016llx respDID %016llx respPrio %u "
843 "ourTID %016llx",
844 stateToString(mState), srcEPStr,
845 response->timelineID,
846 response->deviceID,
847 static_cast<uint32_t>(response->devicePriority),
848 mTimelineID);
849
Mike J. Chen6c929512011-08-15 11:59:47 -0700850 if (mState == ICommonClock::STATE_INITIAL || mState == ICommonClock::STATE_RONIN) {
851 return becomeClient(srcAddr,
852 response->deviceID,
853 response->devicePriority,
854 response->timelineID,
855 "heard whois response");
856 } else if (mState == ICommonClock::STATE_CLIENT) {
857 // if we get multiple responses because there are multiple devices
858 // who believe that they are master, then follow the master that
859 // wins arbitration
860 if (arbitrateMaster(response->deviceID,
861 response->devicePriority,
862 mClient_MasterDeviceID,
863 mClient_MasterDevicePriority)) {
864 return becomeClient(srcAddr,
865 response->deviceID,
866 response->devicePriority,
867 response->timelineID,
868 "heard whois response");
869 }
870 }
871
872 return true;
873}
874
875bool CommonTimeServer::handleSyncRequest(const SyncRequestPacket* request,
876 const sockaddr_storage& srcAddr) {
877 SyncResponsePacket pkt;
878 pkt.initHeader(mTimelineID, mSyncGroupID);
879
880 if ((mState == ICommonClock::STATE_MASTER) &&
881 (mTimelineID == request->timelineID)) {
882 int64_t rxLocalTime = mLastPacketRxLocalTime;
883 int64_t rxCommonTime;
884
885 // If we are master on an actual network and have actual clients, then
886 // we are no longer low priority.
887 setForceLowPriority(false);
888
889 if (OK != mCommonClock.localToCommon(rxLocalTime, &rxCommonTime)) {
890 return false;
891 }
892
893 int64_t txLocalTime = mLocalClock.getLocalTime();;
894 int64_t txCommonTime;
895 if (OK != mCommonClock.localToCommon(txLocalTime, &txCommonTime)) {
896 return false;
897 }
898
899 pkt.nak = 0;
900 pkt.clientTxLocalTime = request->clientTxLocalTime;
901 pkt.masterRxCommonTime = rxCommonTime;
902 pkt.masterTxCommonTime = txCommonTime;
903 } else {
904 pkt.nak = 1;
905 pkt.clientTxLocalTime = 0;
906 pkt.masterRxCommonTime = 0;
907 pkt.masterTxCommonTime = 0;
908 }
909
910 uint8_t buf[256];
911 ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
912 if (bufSz < 0)
913 return false;
914
915 ssize_t sendBytes = sendto(
916 mSocket, &buf, bufSz, 0,
917 reinterpret_cast<const sockaddr *>(&srcAddr),
918 sizeof(srcAddr));
919 if (sendBytes == -1) {
920 ALOGE("%s:%d sendto failed", __PRETTY_FUNCTION__, __LINE__);
921 return false;
922 }
923
924 return true;
925}
926
927bool CommonTimeServer::handleSyncResponse(
928 const SyncResponsePacket* response,
929 const sockaddr_storage& srcAddr) {
930 if (mState != ICommonClock::STATE_CLIENT)
931 return true;
932
933 assert(mMasterEPValid);
934 if (!sockaddrMatch(srcAddr, mMasterEP, true)) {
935 char srcEP[64], expectedEP[64];
936 sockaddrToString(srcAddr, true, srcEP, sizeof(srcEP));
937 sockaddrToString(mMasterEP, true, expectedEP, sizeof(expectedEP));
938 ALOGI("Dropping sync response from unexpected address."
939 " Expected %s Got %s", expectedEP, srcEP);
940 return true;
941 }
942
943 if (response->nak) {
944 // if our master is no longer accepting requests, then we need to find
945 // a new master
946 return becomeRonin("master NAK'ed");
947 }
948
949 mClient_SyncRequestPending = 0;
950 mClient_SyncRequestTimeouts = 0;
951 mClient_PacketRTTLog.logRX(response->clientTxLocalTime,
952 mLastPacketRxLocalTime);
953
954 bool result;
955 if (!(mClient_SyncRespsRXedFromCurMaster++)) {
956 // the first request/response exchange between a client and a master
957 // may take unusually long due to ARP, so discard it.
958 result = true;
959 } else {
960 int64_t clientTxLocalTime = response->clientTxLocalTime;
961 int64_t clientRxLocalTime = mLastPacketRxLocalTime;
962 int64_t masterTxCommonTime = response->masterTxCommonTime;
963 int64_t masterRxCommonTime = response->masterRxCommonTime;
964
965 int64_t rtt = (clientRxLocalTime - clientTxLocalTime);
966 int64_t avgLocal = (clientTxLocalTime + clientRxLocalTime) >> 1;
967 int64_t avgCommon = (masterTxCommonTime + masterRxCommonTime) >> 1;
968
969 // if the RTT of the packet is significantly larger than the panic
970 // threshold, we should simply discard it. Its better to do nothing
971 // than to take cues from a packet like that.
972 int rttCommon = mCommonClock.localDurationToCommonDuration(rtt);
973 if (rttCommon > (static_cast<int64_t>(mPanicThresholdUsec) *
974 kRTTDiscardPanicThreshMultiplier)) {
975 ALOGV("Dropping sync response with RTT of %lld uSec", rttCommon);
976 mClient_ExpiredSyncRespsRXedFromCurMaster++;
977 if (shouldPanicNotGettingGoodData())
978 return becomeInitial("RX panic, no good data");
979 } else {
Kent Ryhorchuk11bc45f2012-02-13 16:24:29 -0800980 result = mClockRecovery.pushDisciplineEvent(avgLocal, avgCommon, rttCommon);
Mike J. Chen6c929512011-08-15 11:59:47 -0700981 mClient_LastGoodSyncRX = clientRxLocalTime;
982
983 if (result) {
984 // indicate to listeners that we've synced to the common timeline
985 notifyClockSync();
986 } else {
987 ALOGE("Panic! Observed clock sync error is too high to tolerate,"
988 " resetting state machine and starting over.");
989 notifyClockSyncLoss();
990 return becomeInitial("panic");
991 }
992 }
993 }
994
995 mCurTimeout.setTimeout(mSyncRequestIntervalMs);
996 return result;
997}
998
999bool CommonTimeServer::handleMasterAnnouncement(
1000 const MasterAnnouncementPacket* packet,
1001 const sockaddr_storage& srcAddr) {
1002 uint64_t newDeviceID = packet->deviceID;
1003 uint8_t newDevicePrio = packet->devicePriority;
1004 uint64_t newTimelineID = packet->timelineID;
1005
John Grossman7a947c42012-08-21 16:39:11 -07001006 // Skip our own messages which come back via broadcast loopback.
1007 if (newDeviceID == mDeviceID)
1008 return true;
1009
John Grossman79489c42012-07-20 10:17:26 -07001010 char srcEPStr[64];
1011 sockaddrToString(srcAddr, true, srcEPStr, sizeof(srcEPStr));
1012 mElectionLog.log("RXed master announcement while in state %s. "
1013 "src %s srcDevID %lld srcPrio %u srcTID %016llx",
1014 stateToString(mState), srcEPStr,
1015 newDeviceID, static_cast<uint32_t>(newDevicePrio),
1016 newTimelineID);
1017
Mike J. Chen6c929512011-08-15 11:59:47 -07001018 if (mState == ICommonClock::STATE_INITIAL ||
1019 mState == ICommonClock::STATE_RONIN ||
1020 mState == ICommonClock::STATE_WAIT_FOR_ELECTION) {
1021 // if we aren't currently following a master, then start following
1022 // this new master
1023 return becomeClient(srcAddr,
1024 newDeviceID,
1025 newDevicePrio,
1026 newTimelineID,
1027 "heard master announcement");
1028 } else if (mState == ICommonClock::STATE_CLIENT) {
1029 // if the new master wins arbitration against our current master,
1030 // then become a client of the new master
1031 if (arbitrateMaster(newDeviceID,
1032 newDevicePrio,
1033 mClient_MasterDeviceID,
1034 mClient_MasterDevicePriority))
1035 return becomeClient(srcAddr,
1036 newDeviceID,
1037 newDevicePrio,
1038 newTimelineID,
1039 "heard master announcement");
1040 } else if (mState == ICommonClock::STATE_MASTER) {
1041 // two masters are competing - if the new one wins arbitration, then
1042 // cease acting as master
1043 if (arbitrateMaster(newDeviceID, newDevicePrio,
1044 mDeviceID, effectivePriority()))
1045 return becomeClient(srcAddr, newDeviceID,
1046 newDevicePrio, newTimelineID,
1047 "heard master announcement");
1048 }
1049
1050 return true;
1051}
1052
1053bool CommonTimeServer::sendWhoIsMasterRequest() {
1054 assert(mState == ICommonClock::STATE_INITIAL || mState == ICommonClock::STATE_RONIN);
1055
1056 // If we have no socket, then we must be in the unconfigured initial state.
1057 // Don't report any errors, just don't try to send the initial who-is-master
1058 // query. Eventually, our network will either become configured, or we will
1059 // be forced into network-less master mode by higher level code.
1060 if (mSocket < 0) {
1061 assert(mState == ICommonClock::STATE_INITIAL);
1062 return true;
1063 }
1064
1065 bool ret = false;
1066 WhoIsMasterRequestPacket pkt;
1067 pkt.initHeader(mSyncGroupID);
1068 pkt.senderDeviceID = mDeviceID;
1069 pkt.senderDevicePriority = effectivePriority();
1070
1071 uint8_t buf[256];
1072 ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1073 if (bufSz >= 0) {
John Grossman79489c42012-07-20 10:17:26 -07001074 char dstEPStr[64];
1075 sockaddrToString(mMasterElectionEP, true, dstEPStr, sizeof(dstEPStr));
1076 mElectionLog.log("TXing WhoIs master request to %s while in state %s. "
1077 "ourTID %016llx ourGID %016llx ourDID %016llx "
1078 "ourPrio %u",
1079 dstEPStr, stateToString(mState),
1080 mTimelineID, mSyncGroupID,
1081 pkt.senderDeviceID, pkt.senderDevicePriority);
1082
Mike J. Chen6c929512011-08-15 11:59:47 -07001083 ssize_t sendBytes = sendto(
1084 mSocket, buf, bufSz, 0,
1085 reinterpret_cast<const sockaddr *>(&mMasterElectionEP),
1086 sizeof(mMasterElectionEP));
1087 if (sendBytes < 0)
1088 ALOGE("WhoIsMaster sendto failed (errno %d)", errno);
1089 ret = true;
1090 }
1091
1092 if (mState == ICommonClock::STATE_INITIAL) {
1093 mCurTimeout.setTimeout(kInitial_WhoIsMasterTimeoutMs);
1094 } else {
1095 mCurTimeout.setTimeout(kRonin_WhoIsMasterTimeoutMs);
1096 }
1097
1098 return ret;
1099}
1100
1101bool CommonTimeServer::sendSyncRequest() {
1102 // If we are sending sync requests, then we must be in the client state and
1103 // we must have a socket (when we have no network, we are only supposed to
1104 // be in INITIAL or MASTER)
1105 assert(mState == ICommonClock::STATE_CLIENT);
1106 assert(mSocket >= 0);
1107
1108 bool ret = false;
1109 SyncRequestPacket pkt;
1110 pkt.initHeader(mTimelineID, mSyncGroupID);
1111 pkt.clientTxLocalTime = mLocalClock.getLocalTime();
1112
1113 if (!mClient_FirstSyncTX)
1114 mClient_FirstSyncTX = pkt.clientTxLocalTime;
1115
1116 mClient_PacketRTTLog.logTX(pkt.clientTxLocalTime);
1117
1118 uint8_t buf[256];
1119 ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1120 if (bufSz >= 0) {
1121 ssize_t sendBytes = sendto(
1122 mSocket, buf, bufSz, 0,
1123 reinterpret_cast<const sockaddr *>(&mMasterEP),
1124 sizeof(mMasterEP));
1125 if (sendBytes < 0)
1126 ALOGE("SyncRequest sendto failed (errno %d)", errno);
1127 ret = true;
1128 }
1129
1130 mClient_SyncsSentToCurMaster++;
1131 mCurTimeout.setTimeout(mSyncRequestIntervalMs);
1132 mClient_SyncRequestPending = true;
1133
1134 return ret;
1135}
1136
1137bool CommonTimeServer::sendMasterAnnouncement() {
1138 bool ret = false;
1139 assert(mState == ICommonClock::STATE_MASTER);
1140
1141 // If we are being asked to send a master announcement, but we have no
1142 // socket, we must be in network-less master mode. Don't bother to send the
1143 // announcement, and don't bother to schedule a timeout. When the network
1144 // comes up, the work thread will get poked and start the process of
1145 // figuring out who the current master should be.
1146 if (mSocket < 0) {
1147 mCurTimeout.setTimeout(kInfiniteTimeout);
1148 return true;
1149 }
1150
1151 MasterAnnouncementPacket pkt;
1152 pkt.initHeader(mTimelineID, mSyncGroupID);
1153 pkt.deviceID = mDeviceID;
1154 pkt.devicePriority = effectivePriority();
1155
1156 uint8_t buf[256];
1157 ssize_t bufSz = pkt.serializePacket(buf, sizeof(buf));
1158 if (bufSz >= 0) {
John Grossman79489c42012-07-20 10:17:26 -07001159 char dstEPStr[64];
1160 sockaddrToString(mMasterElectionEP, true, dstEPStr, sizeof(dstEPStr));
1161 mElectionLog.log("TXing Master announcement to %s while in state %s. "
1162 "ourTID %016llx ourGID %016llx ourDID %016llx "
1163 "ourPrio %u",
1164 dstEPStr, stateToString(mState),
1165 mTimelineID, mSyncGroupID,
1166 pkt.deviceID, pkt.devicePriority);
1167
Mike J. Chen6c929512011-08-15 11:59:47 -07001168 ssize_t sendBytes = sendto(
1169 mSocket, buf, bufSz, 0,
1170 reinterpret_cast<const sockaddr *>(&mMasterElectionEP),
1171 sizeof(mMasterElectionEP));
1172 if (sendBytes < 0)
1173 ALOGE("MasterAnnouncement sendto failed (errno %d)", errno);
1174 ret = true;
1175 }
1176
1177 mCurTimeout.setTimeout(mMasterAnnounceIntervalMs);
1178 return ret;
1179}
1180
1181bool CommonTimeServer::becomeClient(const sockaddr_storage& masterEP,
1182 uint64_t masterDeviceID,
1183 uint8_t masterDevicePriority,
1184 uint64_t timelineID,
1185 const char* cause) {
1186 char newEPStr[64], oldEPStr[64];
1187 sockaddrToString(masterEP, true, newEPStr, sizeof(newEPStr));
1188 sockaddrToString(mMasterEP, mMasterEPValid, oldEPStr, sizeof(oldEPStr));
1189
John Grossman79489c42012-07-20 10:17:26 -07001190 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1191 "%s --> CLIENT (%s) :%s"
1192 " OldMaster: %02x-%014llx::%016llx::%s"
1193 " NewMaster: %02x-%014llx::%016llx::%s",
1194 stateToString(mState), cause,
1195 (mTimelineID != timelineID) ? " (new timeline)" : "",
1196 mClient_MasterDevicePriority, mClient_MasterDeviceID,
1197 mTimelineID, oldEPStr,
1198 masterDevicePriority, masterDeviceID,
1199 timelineID, newEPStr);
Mike J. Chen6c929512011-08-15 11:59:47 -07001200
1201 if (mTimelineID != timelineID) {
1202 // start following a new timeline
1203 mTimelineID = timelineID;
1204 mClockRecovery.reset(true, true);
1205 notifyClockSyncLoss();
1206 } else {
1207 // start following a new master on the existing timeline
1208 mClockRecovery.reset(false, true);
1209 }
1210
1211 mMasterEP = masterEP;
1212 mMasterEPValid = true;
John Grossmane1d6c082012-04-09 11:26:16 -07001213
1214 // If we are on a real network as a client of a real master, then we should
1215 // no longer force low priority. If our master disappears, we should have
1216 // the high priority bit set during the election to replace the master
1217 // because this group was a real group and not a singleton created in
1218 // networkless mode.
Mike J. Chen6c929512011-08-15 11:59:47 -07001219 setForceLowPriority(false);
1220
1221 mClient_MasterDeviceID = masterDeviceID;
1222 mClient_MasterDevicePriority = masterDevicePriority;
1223 resetSyncStats();
1224
1225 setState(ICommonClock::STATE_CLIENT);
1226
1227 // add some jitter to when the various clients send their requests
1228 // in order to reduce the likelihood that a group of clients overload
1229 // the master after receiving a master announcement
1230 usleep((lrand48() % 100) * 1000);
1231
1232 return sendSyncRequest();
1233}
1234
1235bool CommonTimeServer::becomeMaster(const char* cause) {
1236 uint64_t oldTimelineID = mTimelineID;
1237 if (mTimelineID == ICommonClock::kInvalidTimelineID) {
1238 // this device has not been following any existing timeline,
1239 // so it will create a new timeline and declare itself master
1240 assert(!mCommonClock.isValid());
1241
1242 // set the common time basis
1243 mCommonClock.setBasis(mLocalClock.getLocalTime(), 0);
1244
1245 // assign an arbitrary timeline iD
1246 assignTimelineID();
1247
1248 // notify listeners that we've created a common timeline
1249 notifyClockSync();
1250 }
1251
John Grossman79489c42012-07-20 10:17:26 -07001252 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1253 "%s --> MASTER (%s) : %s timeline %016llx",
1254 stateToString(mState), cause,
1255 (oldTimelineID == mTimelineID) ? "taking ownership of"
1256 : "creating new",
1257 mTimelineID);
Mike J. Chen6c929512011-08-15 11:59:47 -07001258
1259 memset(&mMasterEP, 0, sizeof(mMasterEP));
1260 mMasterEPValid = false;
Mike J. Chen6c929512011-08-15 11:59:47 -07001261 mClient_MasterDevicePriority = effectivePriority();
1262 mClient_MasterDeviceID = mDeviceID;
1263 mClockRecovery.reset(false, true);
1264 resetSyncStats();
1265
1266 setState(ICommonClock::STATE_MASTER);
1267 return sendMasterAnnouncement();
1268}
1269
1270bool CommonTimeServer::becomeRonin(const char* cause) {
1271 // If we were the client of a given timeline, but had never received even a
1272 // single time sync packet, then we transition back to Initial instead of
1273 // Ronin. If we transition to Ronin and end up becoming the new Master, we
1274 // will be unable to service requests for other clients because we never
1275 // actually knew what time it was. By going to initial, we ensure that
1276 // other clients who know what time it is, but would lose master arbitration
1277 // in the Ronin case, will step up and become the proper new master of the
1278 // old timeline.
1279
1280 char oldEPStr[64];
1281 sockaddrToString(mMasterEP, mMasterEPValid, oldEPStr, sizeof(oldEPStr));
1282 memset(&mMasterEP, 0, sizeof(mMasterEP));
1283 mMasterEPValid = false;
1284
1285 if (mCommonClock.isValid()) {
John Grossman79489c42012-07-20 10:17:26 -07001286 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1287 "%s --> RONIN (%s) : lost track of previously valid timeline "
Mike J. Chen6c929512011-08-15 11:59:47 -07001288 "%02x-%014llx::%016llx::%s (%d TXed %d RXed %d RXExpired)",
1289 stateToString(mState), cause,
1290 mClient_MasterDevicePriority, mClient_MasterDeviceID,
1291 mTimelineID, oldEPStr,
1292 mClient_SyncsSentToCurMaster,
1293 mClient_SyncRespsRXedFromCurMaster,
1294 mClient_ExpiredSyncRespsRXedFromCurMaster);
1295
1296 mRonin_WhoIsMasterRequestTimeouts = 0;
1297 setState(ICommonClock::STATE_RONIN);
1298 return sendWhoIsMasterRequest();
1299 } else {
John Grossman79489c42012-07-20 10:17:26 -07001300 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1301 "%s --> INITIAL (%s) : never synced timeline "
Mike J. Chen6c929512011-08-15 11:59:47 -07001302 "%02x-%014llx::%016llx::%s (%d TXed %d RXed %d RXExpired)",
1303 stateToString(mState), cause,
1304 mClient_MasterDevicePriority, mClient_MasterDeviceID,
1305 mTimelineID, oldEPStr,
1306 mClient_SyncsSentToCurMaster,
1307 mClient_SyncRespsRXedFromCurMaster,
1308 mClient_ExpiredSyncRespsRXedFromCurMaster);
1309
1310 return becomeInitial("ronin, no timeline");
1311 }
1312}
1313
1314bool CommonTimeServer::becomeWaitForElection(const char* cause) {
John Grossman79489c42012-07-20 10:17:26 -07001315 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1316 "%s --> WAIT_FOR_ELECTION (%s) : dropping out of election,"
Mike J. Chen6c929512011-08-15 11:59:47 -07001317 " waiting %d mSec for completion.",
1318 stateToString(mState), cause, kWaitForElection_TimeoutMs);
1319
1320 setState(ICommonClock::STATE_WAIT_FOR_ELECTION);
1321 mCurTimeout.setTimeout(kWaitForElection_TimeoutMs);
1322 return true;
1323}
1324
1325bool CommonTimeServer::becomeInitial(const char* cause) {
John Grossman79489c42012-07-20 10:17:26 -07001326 mStateChangeLog.log(ANDROID_LOG_INFO, LOG_TAG,
1327 "Entering INITIAL (%s), total reset.",
1328 cause);
Mike J. Chen6c929512011-08-15 11:59:47 -07001329
1330 setState(ICommonClock::STATE_INITIAL);
1331
1332 // reset clock recovery
1333 mClockRecovery.reset(true, true);
1334
1335 // reset internal state bookkeeping.
1336 mCurTimeout.setTimeout(kInfiniteTimeout);
1337 memset(&mMasterEP, 0, sizeof(mMasterEP));
1338 mMasterEPValid = false;
1339 mLastPacketRxLocalTime = 0;
1340 mTimelineID = ICommonClock::kInvalidTimelineID;
1341 mClockSynced = false;
1342 mInitial_WhoIsMasterRequestTimeouts = 0;
1343 mClient_MasterDeviceID = 0;
1344 mClient_MasterDevicePriority = 0;
1345 mRonin_WhoIsMasterRequestTimeouts = 0;
1346 resetSyncStats();
1347
1348 // send the first request to discover the master
1349 return sendWhoIsMasterRequest();
1350}
1351
1352void CommonTimeServer::notifyClockSync() {
1353 if (!mClockSynced) {
1354 mClockSynced = true;
1355 mICommonClock->notifyOnTimelineChanged(mTimelineID);
1356 }
1357}
1358
1359void CommonTimeServer::notifyClockSyncLoss() {
1360 if (mClockSynced) {
1361 mClockSynced = false;
1362 mICommonClock->notifyOnTimelineChanged(
1363 ICommonClock::kInvalidTimelineID);
1364 }
1365}
1366
1367void CommonTimeServer::setState(ICommonClock::State s) {
1368 mState = s;
1369}
1370
1371const char* CommonTimeServer::stateToString(ICommonClock::State s) {
1372 switch(s) {
1373 case ICommonClock::STATE_INITIAL:
1374 return "INITIAL";
1375 case ICommonClock::STATE_CLIENT:
1376 return "CLIENT";
1377 case ICommonClock::STATE_MASTER:
1378 return "MASTER";
1379 case ICommonClock::STATE_RONIN:
1380 return "RONIN";
1381 case ICommonClock::STATE_WAIT_FOR_ELECTION:
1382 return "WAIT_FOR_ELECTION";
1383 default:
1384 return "unknown";
1385 }
1386}
1387
1388void CommonTimeServer::sockaddrToString(const sockaddr_storage& addr,
1389 bool addrValid,
1390 char* buf, size_t bufLen) {
1391 if (!bufLen || !buf)
1392 return;
1393
1394 if (addrValid) {
1395 switch (addr.ss_family) {
1396 case AF_INET: {
1397 const struct sockaddr_in* sa =
1398 reinterpret_cast<const struct sockaddr_in*>(&addr);
1399 unsigned long a = ntohl(sa->sin_addr.s_addr);
1400 uint16_t p = ntohs(sa->sin_port);
1401 snprintf(buf, bufLen, "%lu.%lu.%lu.%lu:%hu",
1402 ((a >> 24) & 0xFF), ((a >> 16) & 0xFF),
1403 ((a >> 8) & 0xFF), (a & 0xFF), p);
1404 } break;
1405
1406 case AF_INET6: {
1407 const struct sockaddr_in6* sa =
1408 reinterpret_cast<const struct sockaddr_in6*>(&addr);
1409 const uint8_t* a = sa->sin6_addr.s6_addr;
1410 uint16_t p = ntohs(sa->sin6_port);
1411 snprintf(buf, bufLen,
1412 "%02X%02X:%02X%02X:%02X%02X:%02X%02X:"
1413 "%02X%02X:%02X%02X:%02X%02X:%02X%02X port %hd",
1414 a[0], a[1], a[ 2], a[ 3], a[ 4], a[ 5], a[ 6], a[ 7],
1415 a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15],
1416 p);
1417 } break;
1418
1419 default:
1420 snprintf(buf, bufLen,
1421 "<unknown sockaddr family %d>", addr.ss_family);
1422 break;
1423 }
1424 } else {
1425 snprintf(buf, bufLen, "<none>");
1426 }
1427
1428 buf[bufLen - 1] = 0;
1429}
1430
1431bool CommonTimeServer::sockaddrMatch(const sockaddr_storage& a1,
1432 const sockaddr_storage& a2,
1433 bool matchAddressOnly) {
1434 if (a1.ss_family != a2.ss_family)
1435 return false;
1436
1437 switch (a1.ss_family) {
1438 case AF_INET: {
1439 const struct sockaddr_in* sa1 =
1440 reinterpret_cast<const struct sockaddr_in*>(&a1);
1441 const struct sockaddr_in* sa2 =
1442 reinterpret_cast<const struct sockaddr_in*>(&a2);
1443
1444 if (sa1->sin_addr.s_addr != sa2->sin_addr.s_addr)
1445 return false;
1446
1447 return (matchAddressOnly || (sa1->sin_port == sa2->sin_port));
1448 } break;
1449
1450 case AF_INET6: {
1451 const struct sockaddr_in6* sa1 =
1452 reinterpret_cast<const struct sockaddr_in6*>(&a1);
1453 const struct sockaddr_in6* sa2 =
1454 reinterpret_cast<const struct sockaddr_in6*>(&a2);
1455
1456 if (memcmp(&sa1->sin6_addr, &sa2->sin6_addr, sizeof(sa2->sin6_addr)))
1457 return false;
1458
1459 return (matchAddressOnly || (sa1->sin6_port == sa2->sin6_port));
1460 } break;
1461
1462 // Huh? We don't deal in non-IPv[46] addresses. Not sure how we got
1463 // here, but we don't know how to comapre these addresses and simply
1464 // default to a no-match decision.
1465 default: return false;
1466 }
1467}
1468
Mike J. Chen6c929512011-08-15 11:59:47 -07001469bool CommonTimeServer::shouldPanicNotGettingGoodData() {
1470 if (mClient_FirstSyncTX) {
1471 int64_t now = mLocalClock.getLocalTime();
1472 int64_t delta = now - (mClient_LastGoodSyncRX
1473 ? mClient_LastGoodSyncRX
1474 : mClient_FirstSyncTX);
1475 int64_t deltaUsec = mCommonClock.localDurationToCommonDuration(delta);
1476
1477 if (deltaUsec >= kNoGoodDataPanicThresholdUsec)
1478 return true;
1479 }
1480
1481 return false;
1482}
1483
1484void CommonTimeServer::PacketRTTLog::logTX(int64_t txTime) {
1485 txTimes[wrPtr] = txTime;
1486 rxTimes[wrPtr] = 0;
1487 wrPtr = (wrPtr + 1) % RTT_LOG_SIZE;
1488 if (!wrPtr)
1489 logFull = true;
1490}
1491
1492void CommonTimeServer::PacketRTTLog::logRX(int64_t txTime, int64_t rxTime) {
1493 if (!logFull && !wrPtr)
1494 return;
1495
1496 uint32_t i = logFull ? wrPtr : 0;
1497 do {
1498 if (txTimes[i] == txTime) {
1499 rxTimes[i] = rxTime;
1500 break;
1501 }
1502 i = (i + 1) % RTT_LOG_SIZE;
1503 } while (i != wrPtr);
1504}
1505
1506} // namespace android