Blame - libs/common_time/clock_recovery.cpp - platform/frameworks/base

blob: 392caa0ca16fc30b10981e8b56ed8ced2d822472 [file] [log] [blame]

Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2011 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	/*
				18	* A service that exchanges time synchronization information between
				19	* a master that defines a timeline and clients that follow the timeline.
				20	*/
				21
				22	#define __STDC_LIMIT_MACROS
				23	#define LOG_TAG "common_time"
				24	#include <utils/Log.h>
Andreas Gampe	4c57eda	2014-11-10 11:51:01 -0800	[diff] [blame]	25	#include <inttypes.h>
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	26	#include <stdint.h>
				27
				28	#include <common_time/local_clock.h>
				29	#include <assert.h>
				30
				31	#include "clock_recovery.h"
				32	#include "common_clock.h"
				33	#ifdef TIME_SERVICE_DEBUG
				34	#include "diag_thread.h"
				35	#endif
				36
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	37	// Define log macro so we can make LOGV into LOGE when we are exclusively
				38	// debugging this code.
				39	#ifdef TIME_SERVICE_DEBUG
				40	#define LOG_TS ALOGE
				41	#else
				42	#define LOG_TS ALOGV
				43	#endif
				44
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	45	namespace android {
				46
				47	ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
				48	CommonClock* common_clock) {
				49	assert(NULL != local_clock);
				50	assert(NULL != common_clock);
				51
				52	local_clock_ = local_clock;
				53	common_clock_ = common_clock;
				54
				55	local_clock_can_slew_ = local_clock_->initCheck() &&
				56	(local_clock_->setLocalSlew(0) == OK);
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	57	tgt_correction_ = 0;
				58	cur_correction_ = 0;
				59
				60	// Precompute the max rate at which we are allowed to change the VCXO
				61	// control.
				62	uint64_t N = 0x10000ull * 1000ull;
				63	uint64_t D = local_clock_->getLocalFreq() * kMinFullRangeSlewChange_mSec;
				64	LinearTransform::reduce(&N, &D);
				65	while ((N > INT32_MAX) \|\| (D > UINT32_MAX)) {
				66	N >>= 1;
				67	D >>= 1;
				68	LinearTransform::reduce(&N, &D);
				69	}
				70	time_to_cur_slew_.a_to_b_numer = static_cast<int32_t>(N);
				71	time_to_cur_slew_.a_to_b_denom = static_cast<uint32_t>(D);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	72
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	73	reset(true, true);
				74
				75	#ifdef TIME_SERVICE_DEBUG
				76	diag_thread_ = new DiagThread(common_clock_, local_clock_);
				77	if (diag_thread_ != NULL) {
				78	status_t res = diag_thread_->startWorkThread();
				79	if (res != OK)
				80	ALOGW("Failed to start A@H clock recovery diagnostic thread.");
				81	} else
				82	ALOGW("Failed to allocate diagnostic thread.");
				83	#endif
				84	}
				85
				86	ClockRecoveryLoop::~ClockRecoveryLoop() {
				87	#ifdef TIME_SERVICE_DEBUG
				88	diag_thread_->stopWorkThread();
				89	#endif
				90	}
				91
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	92	// Constants.
				93	const float ClockRecoveryLoop::dT = 1.0;
				94	const float ClockRecoveryLoop::Kc = 1.0f;
				95	const float ClockRecoveryLoop::Ti = 15.0f;
				96	const float ClockRecoveryLoop::Tf = 0.05;
				97	const float ClockRecoveryLoop::bias_Fc = 0.01;
				98	const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
				99	const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
				100	const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
				101	const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
				102	const float ClockRecoveryLoop::COmin = -100.0f;
				103	const float ClockRecoveryLoop::COmax = 100.0f;
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	104	const uint32_t ClockRecoveryLoop::kMinFullRangeSlewChange_mSec = 300;
				105	const int ClockRecoveryLoop::kSlewChangeStepPeriod_mSec = 10;
				106
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	107
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	108	void ClockRecoveryLoop::reset(bool position, bool frequency) {
				109	Mutex::Autolock lock(&lock_);
				110	reset_l(position, frequency);
				111	}
				112
				113	uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
				114	uint32_t count) {
				115	uint32_t min_rtt = 0;
				116	for (uint32_t i = 1; i < count; ++i)
				117	if (data[min_rtt].rtt > data[i].rtt)
				118	min_rtt = i;
				119
				120	return min_rtt;
				121	}
				122
				123	bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
				124	int64_t nominal_common_time,
				125	int64_t rtt) {
				126	Mutex::Autolock lock(&lock_);
				127
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	128	int64_t local_common_time = 0;
				129	common_clock_->localToCommon(local_time, &local_common_time);
				130	int64_t raw_delta = nominal_common_time - local_common_time;
				131
				132	#ifdef TIME_SERVICE_DEBUG
				133	ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
				134	local_common_time, nominal_common_time,
				135	raw_delta, rtt);
				136	#endif
				137
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	138	// If we have not defined a basis for common time, then we need to use these
				139	// initial points to do so. In order to avoid significant initial error
				140	// from a particularly bad startup data point, we collect the first N data
				141	// points and choose the best of them before moving on.
				142	if (!common_clock_->isValid()) {
				143	if (startup_filter_wr_ < kStartupFilterSize) {
				144	DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_];
				145	d.local_time = local_time;
				146	d.nominal_common_time = nominal_common_time;
				147	d.rtt = rtt;
				148	startup_filter_wr_++;
				149	}
				150
				151	if (startup_filter_wr_ == kStartupFilterSize) {
				152	uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
				153	kStartupFilterSize);
				154
				155	common_clock_->setBasis(
				156	startup_filter_data_[min_rtt].local_time,
				157	startup_filter_data_[min_rtt].nominal_common_time);
				158	}
				159
				160	return true;
				161	}
				162
				163	int64_t observed_common;
				164	int64_t delta;
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	165	float delta_f, dCO;
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	166	int32_t tgt_correction;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	167
				168	if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
				169	// Since we just checked to make certain that this conversion was valid,
				170	// and no one else in the system should be messing with it, if this
				171	// conversion is suddenly invalid, it is a good reason to panic.
				172	ALOGE("Failed to convert local time to common time in %s:%d",
				173	__PRETTY_FUNCTION__, __LINE__);
				174	return false;
				175	}
				176
				177	// Implement a filter which should match NTP filtering behavior when a
				178	// client is associated with only one peer of lower stratum. Basically,
				179	// always use the best of the N last data points, where best is defined as
				180	// lowest round trip time. NTP uses an N of 8; we use a value of 6.
				181	//
				182	// TODO(johngro) : experiment with other filter strategies. The goal here
				183	// is to mitigate the effects of high RTT data points which typically have
				184	// large asymmetries in the TX/RX legs. Downside of the existing NTP
				185	// approach (particularly because of the PID controller we are using to
				186	// produce the control signal from the filtered data) are that the rate at
				187	// which discipline events are actually acted upon becomes irregular and can
				188	// become drawn out (the time between actionable event can go way up). If
				189	// the system receives a strong high quality data point, the proportional
				190	// component of the controller can produce a strong correction which is left
				191	// in place for too long causing overshoot. In addition, the integral
				192	// component of the system currently is an approximation based on the
				193	// assumption of a more or less homogeneous sampling of the error. Its
				194	// unclear what the effect of undermining this assumption would be right
				195	// now.
				196
				197	// Two ideas which come to mind immediately would be to...
				198	// 1) Keep a history of more data points (32 or so) and ignore data points
				199	// whose RTT is more than a certain number of standard deviations outside
				200	// of the norm.
				201	// 2) Eliminate the PID controller portion of this system entirely.
				202	// Instead, move to a system which uses a very wide filter (128 data
				203	// points or more) with a sum-of-least-squares line fitting approach to
				204	// tracking the long term drift. This would take the place of the I
				205	// component in the current PID controller. Also use a much more narrow
				206	// outlier-rejector filter (as described in #1) to drive a short term
				207	// correction factor similar to the P component of the PID controller.
				208	assert(filter_wr_ < kFilterSize);
				209	filter_data_[filter_wr_].local_time = local_time;
				210	filter_data_[filter_wr_].observed_common_time = observed_common;
				211	filter_data_[filter_wr_].nominal_common_time = nominal_common_time;
				212	filter_data_[filter_wr_].rtt = rtt;
				213	filter_data_[filter_wr_].point_used = false;
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	214	uint32_t current_point = filter_wr_;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	215	filter_wr_ = (filter_wr_ + 1) % kFilterSize;
				216	if (!filter_wr_)
				217	filter_full_ = true;
				218
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	219	uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
				220	uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	221	// We only use packets with low RTTs for control. If the packet RTT
				222	// is less than the panic threshold, we can probably eat the jitter with the
				223	// control loop. Otherwise, take the packet only if it better than all
				224	// of the packets we have in the history. That way we try to track
				225	// something, even if it is noisy.
				226	if (current_point == min_rtt \|\| rtt < control_thresh_) {
				227	delta_f = delta = nominal_common_time - observed_common;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	228
John Grossman	79489c4	2012-07-20 10:17:26 -0700	[diff] [blame]	229	last_error_est_valid_ = true;
				230	last_error_est_usec_ = delta;
				231
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	232	// Compute the error then clamp to the panic threshold. If we ever
				233	// exceed this amt of error, its time to panic and reset the system.
				234	// Given that the error in the measurement of the error could be as
				235	// high as the RTT of the data point, we don't actually panic until
				236	// the implied error (delta) is greater than the absolute panic
				237	// threashold plus the RTT. IOW - we don't panic until we are
				238	// absoluely sure that our best case sync is worse than the absolute
				239	// panic threshold.
				240	int64_t effective_panic_thresh = panic_thresh_ + rtt;
				241	if ((delta > effective_panic_thresh) \|\|
				242	(delta < -effective_panic_thresh)) {
				243	// PANIC!!!
				244	reset_l(false, true);
				245	return false;
				246	}
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	247
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	248	} else {
				249	// We do not have a good packet to look at, but we also do not want to
				250	// free-run the clock at some crazy slew rate. So we guess the
				251	// trajectory of the clock based on the last controller output and the
				252	// estimated bias of our clock against the master.
				253	// The net effect of this is that CO == CObias after some extended
				254	// period of no feedback.
				255	delta_f = last_delta_f_ - dT*(CO - CObias);
				256	delta = delta_f;
				257	}
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	258
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	259	// Velocity form PI control equation.
				260	dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
				261	CO += dCO * Tf; // Filter CO by applying gain <1 here.
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	262
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	263	// Save error terms for later.
				264	last_delta_f_ = delta_f;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	265
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	266	// Clamp CO to +/- 100ppm.
				267	if (CO < COmin)
				268	CO = COmin;
				269	else if (CO > COmax)
				270	CO = COmax;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	271
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	272	// Update the controller bias.
				273	CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
				274	lastCObias = CObias;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	275
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	276	// Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
				277	// don't get fp weirdness.
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	278	tgt_correction = CO * 327.66;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	279
				280	// If there was a change in the amt of correction to use, update the
				281	// system.
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	282	setTargetCorrection_l(tgt_correction);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	283
Andreas Gampe	4c57eda	2014-11-10 11:51:01 -0800	[diff] [blame]	284	LOG_TS("clock_loop %" PRId64 " %f %f %f %d\n", raw_delta, delta_f, CO, CObias, tgt_correction);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	285
				286	#ifdef TIME_SERVICE_DEBUG
				287	diag_thread_->pushDisciplineEvent(
				288	local_time,
				289	observed_common,
				290	nominal_common_time,
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	291	tgt_correction,
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	292	rtt);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	293	#endif
				294
				295	return true;
				296	}
				297
				298	int32_t ClockRecoveryLoop::getLastErrorEstimate() {
				299	Mutex::Autolock lock(&lock_);
				300
John Grossman	79489c4	2012-07-20 10:17:26 -0700	[diff] [blame]	301	if (last_error_est_valid_)
				302	return last_error_est_usec_;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	303	else
				304	return ICommonClock::kErrorEstimateUnknown;
				305	}
				306
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	307	void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
				308	assert(NULL != common_clock_);
				309
				310	if (position) {
				311	common_clock_->resetBasis();
				312	startup_filter_wr_ = 0;
				313	}
				314
				315	if (frequency) {
John Grossman	79489c4	2012-07-20 10:17:26 -0700	[diff] [blame]	316	last_error_est_valid_ = false;
				317	last_error_est_usec_ = 0;
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	318	last_delta_f_ = 0.0;
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	319	CO = 0.0f;
				320	lastCObias = CObias = 0.0f;
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	321	setTargetCorrection_l(0);
				322	applySlew_l();
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	323	}
				324
				325	filter_wr_ = 0;
				326	filter_full_ = false;
				327	}
				328
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	329	void ClockRecoveryLoop::setTargetCorrection_l(int32_t tgt) {
				330	// When we make a change to the slew rate, we need to be careful to not
				331	// change it too quickly as it can anger some HDMI sinks out there, notably
				332	// some Sony panels from the 2010-2011 timeframe. From experimenting with
				333	// some of these sinks, it seems like swinging from one end of the range to
				334	// another in less that 190mSec or so can start to cause trouble. Adding in
				335	// a hefty margin, we limit the system to a full range sweep in no less than
				336	// 300mSec.
				337	if (tgt_correction_ != tgt) {
				338	int64_t now = local_clock_->getLocalTime();
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	339
				340	tgt_correction_ = tgt;
				341
				342	// Set up the transformation to figure out what the slew should be at
				343	// any given point in time in the future.
				344	time_to_cur_slew_.a_zero = now;
				345	time_to_cur_slew_.b_zero = cur_correction_;
				346
				347	// Make sure the sign of the slope is headed in the proper direction.
				348	bool needs_increase = (cur_correction_ < tgt_correction_);
				349	bool is_increasing = (time_to_cur_slew_.a_to_b_numer > 0);
				350	if (( needs_increase && !is_increasing) \|\|
				351	(!needs_increase && is_increasing)) {
				352	time_to_cur_slew_.a_to_b_numer = -time_to_cur_slew_.a_to_b_numer;
				353	}
				354
				355	// Finally, figure out when the change will be finished and start the
				356	// slew operation.
				357	time_to_cur_slew_.doReverseTransform(tgt_correction_,
				358	&slew_change_end_time_);
				359
				360	applySlew_l();
				361	}
				362	}
				363
				364	bool ClockRecoveryLoop::applySlew_l() {
				365	bool ret = true;
				366
				367	// If cur == tgt, there is no ongoing sleq rate change and we are already
				368	// finished.
				369	if (cur_correction_ == tgt_correction_)
				370	goto bailout;
				371
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	372	if (local_clock_can_slew_) {
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	373	int64_t now = local_clock_->getLocalTime();
				374	int64_t tmp;
				375
				376	if (now >= slew_change_end_time_) {
				377	cur_correction_ = tgt_correction_;
				378	next_slew_change_timeout_.setTimeout(-1);
				379	} else {
				380	time_to_cur_slew_.doForwardTransform(now, &tmp);
				381
				382	if (tmp > INT16_MAX)
				383	cur_correction_ = INT16_MAX;
				384	else if (tmp < INT16_MIN)
				385	cur_correction_ = INT16_MIN;
				386	else
				387	cur_correction_ = static_cast<int16_t>(tmp);
				388
				389	next_slew_change_timeout_.setTimeout(kSlewChangeStepPeriod_mSec);
				390	ret = false;
				391	}
				392
				393	local_clock_->setLocalSlew(cur_correction_);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	394	} else {
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	395	// Since we are not actually changing the rate of a HW clock, we don't
				396	// need to worry to much about changing the slew rate so fast that we
				397	// anger any downstream HDMI devices.
				398	cur_correction_ = tgt_correction_;
				399	next_slew_change_timeout_.setTimeout(-1);
				400
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	401	// The SW clock recovery implemented by the common clock class expects
Kent Ryhorchuk	11bc45f	2012-02-13 16:24:29 -0800	[diff] [blame]	402	// values expressed in PPM. CO is in ppm.
				403	common_clock_->setSlew(local_clock_->getLocalTime(), CO);
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	404	}
John Grossman	c7f57c6	2012-06-26 12:50:28 -0700	[diff] [blame]	405
				406	bailout:
				407	return ret;
				408	}
				409
				410	int ClockRecoveryLoop::applyRateLimitedSlew() {
				411	Mutex::Autolock lock(&lock_);
				412
				413	int ret = next_slew_change_timeout_.msecTillTimeout();
				414	if (!ret) {
				415	if (applySlew_l())
				416	next_slew_change_timeout_.setTimeout(-1);
				417	ret = next_slew_change_timeout_.msecTillTimeout();
				418	}
				419
				420	return ret;
Mike J. Chen	6c92951	2011-08-15 11:59:47 -0700	[diff] [blame]	421	}
				422
				423	} // namespace android