Blame - drivers/block/drbd/drbd_vli.h - kernel/msm-4.9

blob: 8cb1532a381600f44fc5145ad89d506235300869 [file] [log] [blame]

Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	1	/*
				2	-- linux-c --
				3	drbd_receiver.c
				4	This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
				5
				6	Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
				7	Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
				8	Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
				9
				10	drbd is free software; you can redistribute it and/or modify
				11	it under the terms of the GNU General Public License as published by
				12	the Free Software Foundation; either version 2, or (at your option)
				13	any later version.
				14
				15	drbd is distributed in the hope that it will be useful,
				16	but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				18	GNU General Public License for more details.
				19
				20	You should have received a copy of the GNU General Public License
				21	along with drbd; see the file COPYING. If not, write to
				22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
				23	*/
				24
				25	#ifndef _DRBD_VLI_H
				26	#define _DRBD_VLI_H
				27
				28	/*
				29	* At a granularity of 4KiB storage represented per bit,
				30	* and stroage sizes of several TiB,
				31	* and possibly small-bandwidth replication,
				32	* the bitmap transfer time can take much too long,
				33	* if transmitted in plain text.
				34	*
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	35	* We try to reduce the transferred bitmap information
Philipp Reisner	b411b36	2009-09-25 16:07:19 -0700	[diff] [blame]	36	* by encoding runlengths of bit polarity.
				37	*
				38	* We never actually need to encode a "zero" (runlengths are positive).
				39	* But then we have to store the value of the first bit.
				40	* The first bit of information thus shall encode if the first runlength
				41	* gives the number of set or unset bits.
				42	*
				43	* We assume that large areas are either completely set or unset,
				44	* which gives good compression with any runlength method,
				45	* even when encoding the runlength as fixed size 32bit/64bit integers.
				46	*
				47	* Still, there may be areas where the polarity flips every few bits,
				48	* and encoding the runlength sequence of those areas with fix size
				49	* integers would be much worse than plaintext.
				50	*
				51	* We want to encode small runlength values with minimum code length,
				52	* while still being able to encode a Huge run of all zeros.
				53	*
				54	* Thus we need a Variable Length Integer encoding, VLI.
				55	*
				56	* For some cases, we produce more code bits than plaintext input.
				57	* We need to send incompressible chunks as plaintext, skip over them
				58	* and then see if the next chunk compresses better.
				59	*
				60	* We don't care too much about "excellent" compression ratio for large
				61	* runlengths (all set/all clear): whether we achieve a factor of 100
				62	* or 1000 is not that much of an issue.
				63	* We do not want to waste too much on short runlengths in the "noisy"
				64	* parts of the bitmap, though.
				65	*
				66	* There are endless variants of VLI, we experimented with:
				67	* * simple byte-based
				68	* * various bit based with different code word length.
				69	*
				70	* To avoid yet an other configuration parameter (choice of bitmap compression
				71	* algorithm) which was difficult to explain and tune, we just chose the one
				72	* variant that turned out best in all test cases.
				73	* Based on real world usage patterns, with device sizes ranging from a few GiB
				74	* to several TiB, file server/mailserver/webserver/mysql/postgress,
				75	* mostly idle to really busy, the all time winner (though sometimes only
				76	* marginally better) is:
				77	*/
				78
				79	/*
				80	* encoding is "visualised" as
				81	* __little endian__ bitstream, least significant bit first (left most)
				82	*
				83	* this particular encoding is chosen so that the prefix code
				84	* starts as unary encoding the level, then modified so that
				85	* 10 levels can be described in 8bit, with minimal overhead
				86	* for the smaller levels.
				87	*
				88	* Number of data bits follow fibonacci sequence, with the exception of the
				89	* last level (+1 data bit, so it makes 64bit total). The only worse code when
				90	* encoding bit polarity runlength is 1 plain bits => 2 code bits.
				91	prefix data bits max val Nº data bits
				92	0 x 0x2 1
				93	10 x 0x4 1
				94	110 xx 0x8 2
				95	1110 xxx 0x10 3
				96	11110 xxx xx 0x30 5
				97	111110 xx xxxxxx 0x130 8
				98	11111100 xxxxxxxx xxxxx 0x2130 13
				99	11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21
				100	11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34
				101	11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
				102	* maximum encodable value: 0x100000400202130 == 2*56 + some /
				103
				104	/* compression "table":
				105	transmitted x 0.29
				106	as plaintext x ........................
				107	x ........................
				108	x ........................
				109	x 0.59 0.21........................
				110	x ........................................................
				111	x .. c ...................................................
				112	x 0.44.. o ...................................................
				113	x .......... d ...................................................
				114	x .......... e ...................................................
				115	X............. ...................................................
				116	x.............. b ...................................................
				117	2.0x............... i ...................................................
				118	#X................ t ...................................................
				119	#................. s ........................... plain bits ..........
				120	-+-----------------------------------------------------------------------
				121	1 16 32 64
				122	*/
				123
				124	/* LEVEL: (total bits, prefix bits, prefix value),
				125	* sorted ascending by number of total bits.
				126	* The rest of the code table is calculated at compiletime from this. */
				127
				128	/* fibonacci data 1, 1, ... */
				129	#define VLI_L_1_1() do { \
				130	LEVEL( 2, 1, 0x00); \
				131	LEVEL( 3, 2, 0x01); \
				132	LEVEL( 5, 3, 0x03); \
				133	LEVEL( 7, 4, 0x07); \
				134	LEVEL(10, 5, 0x0f); \
				135	LEVEL(14, 6, 0x1f); \
				136	LEVEL(21, 8, 0x3f); \
				137	LEVEL(29, 8, 0x7f); \
				138	LEVEL(42, 8, 0xbf); \
				139	LEVEL(64, 8, 0xff); \
				140	} while (0)
				141
				142	/* finds a suitable level to decode the least significant part of in.
				143	* returns number of bits consumed.
				144	*
				145	* BUG() for bad input, as that would mean a buggy code table. */
				146	static inline int vli_decode_bits(u64 *out, const u64 in)
				147	{
				148	u64 adj = 1;
				149
				150	#define LEVEL(t,b,v) \
				151	do { \
				152	if ((in & ((1 << b) -1)) == v) { \
				153	*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \
				154	return t; \
				155	} \
				156	adj += 1ULL << (t - b); \
				157	} while (0)
				158
				159	VLI_L_1_1();
				160
				161	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
				162	BUG();
				163	#undef LEVEL
				164	}
				165
				166	/* return number of code bits needed,
				167	* or negative error number */
				168	static inline int __vli_encode_bits(u64 *out, const u64 in)
				169	{
				170	u64 max = 0;
				171	u64 adj = 1;
				172
				173	if (in == 0)
				174	return -EINVAL;
				175
				176	#define LEVEL(t,b,v) do { \
				177	max += 1ULL << (t - b); \
				178	if (in <= max) { \
				179	if (out) \
				180	*out = ((in - adj) << b) \| v; \
				181	return t; \
				182	} \
				183	adj = max + 1; \
				184	} while (0)
				185
				186	VLI_L_1_1();
				187
				188	return -EOVERFLOW;
				189	#undef LEVEL
				190	}
				191
				192	#undef VLI_L_1_1
				193
				194	/* code from here down is independend of actually used bit code */
				195
				196	/*
				197	* Code length is determined by some unique (e.g. unary) prefix.
				198	* This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
				199	* not a byte stream.
				200	*/
				201
				202	/* for the bitstream, we need a cursor */
				203	struct bitstream_cursor {
				204	/* the current byte */
				205	u8 *b;
				206	/* the current bit within b, nomalized: 0..7 /
				207	unsigned int bit;
				208	};
				209
				210	/* initialize cursor to point to first bit of stream */
				211	static inline void bitstream_cursor_reset(struct bitstream_cursor cur, void s)
				212	{
				213	cur->b = s;
				214	cur->bit = 0;
				215	}
				216
				217	/* advance cursor by that many bits; maximum expected input value: 64,
				218	* but depending on VLI implementation, it may be more. */
				219	static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
				220	{
				221	bits += cur->bit;
				222	cur->b = cur->b + (bits >> 3);
				223	cur->bit = bits & 7;
				224	}
				225
				226	/* the bitstream itself knows its length */
				227	struct bitstream {
				228	struct bitstream_cursor cur;
				229	unsigned char *buf;
				230	size_t buf_len; /* in bytes */
				231
				232	/* for input stream:
				233	* number of trailing 0 bits for padding
				234	* total number of valid bits in stream: buf_len * 8 - pad_bits */
				235	unsigned int pad_bits;
				236	};
				237
				238	static inline void bitstream_init(struct bitstream bs, void s, size_t len, unsigned int pad_bits)
				239	{
				240	bs->buf = s;
				241	bs->buf_len = len;
				242	bs->pad_bits = pad_bits;
				243	bitstream_cursor_reset(&bs->cur, bs->buf);
				244	}
				245
				246	static inline void bitstream_rewind(struct bitstream *bs)
				247	{
				248	bitstream_cursor_reset(&bs->cur, bs->buf);
				249	memset(bs->buf, 0, bs->buf_len);
				250	}
				251
				252	/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
				253	* Ignores "pad_bits".
				254	* Returns zero if bits == 0 (nothing to do).
				255	* Returns number of bits used if successful.
				256	*
				257	* If there is not enough room left in bitstream,
				258	* leaves bitstream unchanged and returns -ENOBUFS.
				259	*/
				260	static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
				261	{
				262	unsigned char *b = bs->cur.b;
				263	unsigned int tmp;
				264
				265	if (bits == 0)
				266	return 0;
				267
				268	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
				269	return -ENOBUFS;
				270
				271	/* paranoia: strip off hi bits; they should not be set anyways. */
				272	if (bits < 64)
				273	val &= ~0ULL >> (64 - bits);
				274
				275	*b++ \|= (val & 0xff) << bs->cur.bit;
				276
				277	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
				278	*b++ \|= (val >> tmp) & 0xff;
				279
				280	bitstream_cursor_advance(&bs->cur, bits);
				281	return bits;
				282	}
				283
				284	/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
				285	*
				286	* If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
				287	*
				288	* If there are less than the requested number of valid bits left in the
				289	* bitstream, still fetches all available bits.
				290	*
				291	* Returns number of actually fetched bits.
				292	*/
				293	static inline int bitstream_get_bits(struct bitstream bs, u64 out, int bits)
				294	{
				295	u64 val;
				296	unsigned int n;
				297
				298	if (bits > 64)
				299	return -EINVAL;
				300
				301	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
				302	bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
				303	- bs->cur.bit - bs->pad_bits;
				304
				305	if (bits == 0) {
				306	*out = 0;
				307	return 0;
				308	}
				309
				310	/* get the high bits */
				311	val = 0;
				312	n = (bs->cur.bit + bits + 7) >> 3;
				313	/* n may be at most 9, if cur.bit + bits > 64 */
				314	/* which means this copies at most 8 byte */
				315	if (n) {
				316	memcpy(&val, bs->cur.b+1, n - 1);
				317	val = le64_to_cpu(val) << (8 - bs->cur.bit);
				318	}
				319
				320	/* we still need the low bits */
				321	val \|= bs->cur.b[0] >> bs->cur.bit;
				322
				323	/* and mask out bits we don't want */
				324	val &= ~0ULL >> (64 - bits);
				325
				326	bitstream_cursor_advance(&bs->cur, bits);
				327	*out = val;
				328
				329	return bits;
				330	}
				331
				332	/* encodes @in as vli into @bs;
				333
				334	* return values
				335	* > 0: number of bits successfully stored in bitstream
				336	* -ENOBUFS @bs is full
				337	* -EINVAL input zero (invalid)
				338	* -EOVERFLOW input too large for this vli code (invalid)
				339	*/
				340	static inline int vli_encode_bits(struct bitstream *bs, u64 in)
				341	{
				342	u64 code = code;
				343	int bits = __vli_encode_bits(&code, in);
				344
				345	if (bits <= 0)
				346	return bits;
				347
				348	return bitstream_put_bits(bs, code, bits);
				349	}
				350
				351	#endif