Blame - doc/draft-ietf-codec-oggopus.xml - platform/external/libopus

blob: 6f32906588b8c9906cb152b0f9cc90a29da4bcc2 [file] [log] [blame]

Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1	<?xml version="1.0" encoding="utf-8"?>
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	2	<!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	3	<!ENTITY rfc2119 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.2119.xml'>
				4	<!ENTITY rfc3533 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.3533.xml'>
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	5	<!ENTITY rfc3629 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.3629.xml'>
				6	<!ENTITY rfc4732 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.4732.xml'>
Ralph Giles	a1b913f	2013-01-10 17:00:18 -0800	[diff] [blame]	7	<!ENTITY rfc5334 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.5334.xml'>
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	8	<!ENTITY rfc6381 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.6381.xml'>
				9	<!ENTITY rfc6716 PUBLIC '' 'https://xml2rfc.tools.ietf.org/tools/xml2rfc/public/rfc/bibxml/reference.RFC.6716.xml'>
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	10	]>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	11	<?rfc toc="yes" symrefs="yes" ?>
				12
Ralph Giles	1474e71	2014-01-17 12:10:50 -0800	[diff] [blame]	13	<rfc ipr="trust200902" category="std" docName="draft-ietf-codec-oggopus-02">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	14
				15	<front>
				16	<title abbrev="Ogg Opus">Ogg Encapsulation for the Opus Audio Codec</title>
				17	<author initials="T.B." surname="Terriberry" fullname="Timothy B. Terriberry">
				18	<organization>Mozilla Corporation</organization>
				19	<address>
				20	<postal>
				21	<street>650 Castro Street</street>
				22	<city>Mountain View</city>
				23	<region>CA</region>
				24	<code>94041</code>
				25	<country>USA</country>
				26	</postal>
				27	<phone>+1 650 903-0800</phone>
				28	<email>tterribe@xiph.org</email>
				29	</address>
				30	</author>
				31
				32	<author initials="R." surname="Lee" fullname="Ron Lee">
				33	<organization>Voicetronix</organization>
				34	<address>
				35	<postal>
				36	<street>246 Pulteney Street, Level 1</street>
				37	<city>Adelaide</city>
				38	<region>SA</region>
				39	<code>5000</code>
				40	<country>Australia</country>
				41	</postal>
				42	<phone>+61 8 8232 9112</phone>
				43	<email>ron@debian.org</email>
				44	</address>
				45	</author>
				46
Ralph Giles	1935025	2012-07-16 11:41:27 -0400	[diff] [blame]	47	<author initials="R." surname="Giles" fullname="Ralph Giles">
				48	<organization>Mozilla Corporation</organization>
				49	<address>
				50	<postal>
				51	<street>163 West Hastings Street</street>
				52	<city>Vancouver</city>
				53	<region>BC</region>
				54	<code>V6B 1H5</code>
				55	<country>Canada</country>
				56	</postal>
Ralph Giles	0f9c404	2014-01-17 11:15:34 -0800	[diff] [blame]	57	<phone>+1 778 785 1540</phone>
Ralph Giles	1935025	2012-07-16 11:41:27 -0400	[diff] [blame]	58	<email>giles@xiph.org</email>
				59	</address>
				60	</author>
				61
Ralph Giles	1474e71	2014-01-17 12:10:50 -0800	[diff] [blame]	62	<date day="17" month="January" year="2014"/>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	63	<area>RAI</area>
				64	<workgroup>codec</workgroup>
				65
				66	<abstract>
				67	<t>
				68	This document defines the Ogg encapsulation for the Opus interactive speech and
				69	audio codec.
				70	This allows data encoded in the Opus format to be stored in an Ogg logical
				71	bitstream.
Ralph Giles	e515687	2012-07-06 12:17:23 -0700	[diff] [blame]	72	Ogg encapsulation provides Opus with a long-term storage format supporting
				73	all of the essential features, including metadata, fast and accurate seeking,
				74	corruption detection, recapture after errors, low overhead, and the ability to
				75	multiplex Opus with other codecs (including video) with minimal buffering.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	76	It also provides a live streamable format, capable of delivery over a reliable
				77	stream-oriented transport, without requiring all the data, or even the total
				78	length of the data, up-front, in a form that is identical to the on-disk
				79	storage format.
				80	</t>
				81	</abstract>
				82	</front>
				83
				84	<middle>
				85	<section anchor="intro" title="Introduction">
				86	<t>
				87	The IETF Opus codec is a low-latency audio codec optimized for both voice and
				88	general-purpose audio.
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	89	See <xref target="RFC6716"/> for technical details.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	90	This document defines the encapsulation of Opus in a continuous, logical Ogg
				91	bitstream <xref target="RFC3533"/>.
				92	</t>
				93	<t>
				94	Ogg bitstreams are made up of a series of 'pages', each of which contains data
				95	from one or more 'packets'.
				96	Pages are the fundamental unit of multiplexing in an Ogg stream.
				97	Each page is associated with a particular logical stream and contains a capture
				98	pattern and checksum, flags to mark the beginning and end of the logical
				99	stream, and a 'granule position' that represents an absolute position in the
				100	stream, to aid seeking.
				101	A single page can contain up to 65,025 octets of packet data from up to 255
				102	different packets.
				103	Packets may be split arbitrarily across pages, and continued from one page to
				104	the next (allowing packets much larger than would fit on a single page).
				105	Each page contains 'lacing values' that indicate how the data is partitioned
				106	into packets, allowing a demuxer to recover the packet boundaries without
				107	examining the encoded data.
				108	A packet is said to 'complete' on a page when the page contains the final
				109	lacing value corresponding to that packet.
				110	</t>
				111	<t>
				112	This encapsulation defines the required contents of the packet data, including
				113	the necessary headers, the organization of those packets into a logical
				114	stream, and the interpretation of the codec-specific granule position field.
				115	It does not attempt to describe or specify the existing Ogg container format.
				116	Readers unfamiliar with the basic concepts mentioned above are encouraged to
				117	review the details in <xref target="RFC3533"/>.
				118	</t>
				119
				120	</section>
				121
				122	<section anchor="terminology" title="Terminology">
				123	<t>
				124	The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
				125	"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
				126	interpreted as described in <xref target="RFC2119"/>.
				127	</t>
				128
				129	<t>
				130	Implementations that fail to satisfy one or more "MUST" requirements are
				131	considered non-compliant.
				132	Implementations that satisfy all "MUST" requirements, but fail to satisfy one
				133	or more "SHOULD" requirements are said to be "conditionally compliant".
				134	All other implementations are "unconditionally compliant".
				135	</t>
				136
				137	</section>
				138
				139	<section anchor="packet_organization" title="Packet Organization">
				140	<t>
				141	An Opus stream is organized as follows.
				142	</t>
				143	<t>
				144	There are two mandatory header packets.
				145	The granule position of the pages on which these packets complete MUST be zero.
				146	</t>
				147	<t>
				148	The first packet in the logical Ogg bitstream MUST contain the identification
				149	(ID) header, which uniquely identifies a stream as Opus audio.
				150	The format of this header is defined in <xref target="id_header"/>.
				151	It MUST be placed alone (without any other packet data) on the first page of
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	152	the logical Ogg bitstream, and must complete on that page.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	153	This page MUST have its 'beginning of stream' flag set.
				154	</t>
				155	<t>
				156	The second packet in the logical Ogg bitstream MUST contain the comment header,
				157	which contains user-supplied metadata.
				158	The format of this header is defined in <xref target="comment_header"/>.
				159	It MAY span one or more pages, beginning on the second page of the logical
				160	stream.
				161	However many pages it spans, the comment header packet MUST finish the page on
				162	which it completes.
				163	</t>
				164	<t>
Ralph Giles	19658bd	2012-07-16 12:34:04 -0400	[diff] [blame]	165	All subsequent pages are audio data pages, and the Ogg packets they contain are
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	166	audio data packets.
				167	Each audio data packet contains one Opus packet for each of N different
				168	streams, where N is typically one for mono or stereo, but may be greater than
				169	one for, e.g., multichannel audio.
				170	The value N is specified in the ID header (see
				171	<xref target="channel_mapping"/>), and is fixed over the entire length of the
				172	logical Ogg bitstream.
				173	</t>
				174	<t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	175	The first N-1 Opus packets, if any, are packed one after another into the Ogg
				176	packet, using the self-delimiting framing from Appendix B of
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	177	<xref target="RFC6716"/>.
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	178	The remaining Opus packet is packed at the end of the Ogg packet using the
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	179	regular, undelimited framing from Section 3 of <xref target="RFC6716"/>.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	180	All of the Opus packets in a single Ogg packet MUST be constrained to have the
				181	same duration.
				182	A decoder SHOULD treat any Opus packet whose duration is different from that of
				183	the first Opus packet in an Ogg packet as if it were an Opus packet with an
				184	illegal TOC sequence.
				185	</t>
				186	<t>
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	187	The coding mode (SILK, Hybrid, or CELT), audio bandwidth, channel count,
				188	duration (frame size), and number of frames per packet, are indicated in the
				189	TOC (table of contents) in the first byte of each Opus packet, as described
Ron	deb46d1	2014-01-19 15:52:31 +1030	[diff] [blame]	190	in Section 3.1 of <xref target="RFC6716"/>.
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	191	The combination of mode, audio bandwidth, and frame size, is referred to as
				192	the configuration of an Opus packet.
				193	</t>
				194	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	195	The first audio data page SHOULD NOT have the 'continued packet' flag set
Timothy B. Terriberry	ad333d0	2012-08-24 11:54:38 -0700	[diff] [blame]	196	(which would indicate the first audio data packet is continued from a previous
				197	page).
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	198	Packets MUST be placed into Ogg pages in order until the end of stream.
				199	Audio packets MAY span page boundaries.
				200	A decoder MUST treat a zero-octet audio data packet as if it were an Opus
				201	packet with an illegal TOC sequence.
				202	The last page SHOULD have the 'end of stream' flag set, but implementations
				203	should be prepared to deal with truncated streams that do not have a page
				204	marked 'end of stream'.
				205	The final packet on the last page SHOULD NOT be a continued packet, i.e., the
				206	final lacing value should be less than 255.
				207	There MUST NOT be any more pages in an Opus logical bitstream after a page
				208	marked 'end of stream'.
				209	</t>
				210	</section>
				211
				212	<section anchor="granpos" title="Granule Position">
				213	<t>
				214	The granule position of an audio data page encodes the total number of PCM
				215	samples in the stream up to and including the last fully-decodable sample from
				216	the last packet completed on that page.
				217	A page that is entirely spanned by a single packet (that completes on a
				218	subsequent page) has no granule position, and the granule position field MUST
				219	be set to the special value '-1' in two's complement.
				220	</t>
				221
				222	<t>
				223	The granule position of an audio data page is in units of PCM audio samples at
				224	a fixed rate of 48 kHz (per channel; a stereo stream's granule position
				225	does not increment at twice the speed of a mono stream).
				226	It is possible to run an Opus decoder at other sampling rates, but the value
				227	in the granule position field always counts samples assuming a 48 kHz
				228	decoding rate, and the rest of this specification makes the same assumption.
				229	</t>
				230
				231	<t>
				232	The duration of an Opus packet may be any multiple of 2.5 ms, up to a
				233	maximum of 120 ms.
				234	This duration is encoded in the TOC sequence at the beginning of each packet.
				235	The number of samples returned by a decoder corresponds to this duration
				236	exactly, even for the first few packets.
				237	For example, a 20 ms packet fed to a decoder running at 48 kHz will
				238	always return 960 samples.
				239	A demuxer can parse the TOC sequence at the beginning of each Ogg packet to
				240	work backwards or forwards from a packet with a known granule position (i.e.,
				241	the last packet completed on some page) in order to assign granule positions
				242	to every packet, or even every individual sample.
				243	The one exception is the last page in the stream, as described below.
				244	</t>
				245
				246	<t>
				247	All other pages with completed packets after the first MUST have a granule
				248	position equal to the number of samples contained in packets that complete on
				249	that page plus the granule position of the most recent page with completed
				250	packets.
				251	This guarantees that a demuxer can assign individual packets the same granule
				252	position when working forwards as when working backwards.
				253	For this to work, there cannot be any gaps.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	254	</t>
				255
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	256	<section anchor="gap-repair" title="Repairing Gaps in Real-time Streams">
				257	<t>
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	258	In order to support capturing a real-time stream that has lost or not
				259	transmitted packets, a muxer SHOULD emit packets that explicitly request the
				260	use of Packet Loss Concealment (PLC) in place of the missing packets.
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	261	Only gaps that are a multiple of 2.5 ms are repairable, as these are the
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	262	only durations that can be created by packet loss or discontinuous
				263	transmission.
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	264	Muxers need not handle other gap sizes.
				265	Creating the necessary packets involves synthesizing a TOC byte (defined in
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	266	Section 3.1 of <xref target="RFC6716"/>)—and whatever
				267	additional internal framing is needed—to indicate the packet duration
				268	for each stream.
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	269	The actual length of each missing Opus frame inside the packet is zero bytes,
				270	as defined in Section 3.2.1 of <xref target="RFC6716"/>.
				271	</t>
				272
				273	<t>
Ralph Giles	b30b2ba	2014-01-17 15:22:57 -0800	[diff] [blame]	274	Zero-byte frames MAY be packed into packets using any of codes 0, 1,
				275	2, or 3.
				276	When successive frames have the same configuration, the higher code packings
				277	reduce overhead.
				278	Likewise, if the TOC configuration matches, the muxer MAY further combine the
				279	empty frames with previous or subsequent non-zero-length frames (using
				280	code 2 or VBR code 3).
				281	</t>
				282
				283	<t>
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	284	<xref target="RFC6716"/> does not impose any requirements on the PLC, but this
				285	section outlines choices that are expected to have a positive influence on
				286	most PLC implementations, including the reference implementation.
Ralph Giles	3ba1bb0	2014-01-17 12:09:45 -0800	[diff] [blame]	287	Synthesized TOC bytes SHOULD maintain the same mode, audio bandwidth,
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	288	channel count, and frame size as the previous packet (if any).
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	289	This is the simplest and usually the most well-tested case for the PLC to
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	290	handle and it covers all losses that do not include a configuration switch,
				291	as defined in Section 4.5 of <xref target="RFC6716"/>.
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	292	</t>
				293
				294	<t>
				295	When a previous packet is available, keeping the audio bandwidth and channel
				296	count the same allows the PLC to provide maximum continuity in the concealment
				297	data it generates.
				298	However, if the size of the gap is not a multiple of the most recent frame
				299	size, then the frame size will have to change for at least some frames.
Ralph Giles	3ba1bb0	2014-01-17 12:09:45 -0800	[diff] [blame]	300	Such changes SHOULD be delayed as long as possible to simplify
				301	things for PLC implementations.
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	302	</t>
				303
				304	<t>
				305	As an example, a 95 ms gap could be encoded as nineteen 5 ms frames
				306	in two bytes with a single CBR code 3 packet.
				307	If the previous frame size was 20 ms, using four 20 ms frames
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	308	followed by three 5 ms frames requires 4 bytes (plus an extra byte
				309	of Ogg lacing overhead), but allows the PLC to use its well-tested steady
				310	state behavior for as long as possible.
				311	The total bitrate of the latter approach, including Ogg overhead, is about
				312	0.4 kbps, so the impact on file size is minimal.
				313	</t>
				314
				315	<t>
				316	Changing modes is discouraged, since this causes some decoder implementations
				317	to reset their PLC state.
Ralph Giles	3ba1bb0	2014-01-17 12:09:45 -0800	[diff] [blame]	318	However, SILK and Hybrid mode frames cannot fill gaps that are not a multiple
				319	of 10 ms.
				320	If switching to CELT mode is needed to match the gap size, a muxer SHOULD do
				321	so at the end of the gap to allow the PLC to function for as long as possible.
				322	</t>
				323
				324	<t>
				325	In the example above, if the previous frame was a 20 ms SILK mode frame,
				326	the better solution is to synthesize a packet describing four 20 ms SILK
				327	frames, followed by a packet with a single 10 ms SILK
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	328	frame, and finally a packet with a 5 ms CELT frame, to fill the 95 ms
				329	gap.
				330	This also requires four bytes to describe the synthesized packet data (two
Ralph Giles	3ba1bb0	2014-01-17 12:09:45 -0800	[diff] [blame]	331	bytes for a CBR code 3 and one byte each for two code 0 packets) but three
				332	bytes of Ogg lacing overhead are required to mark the packet boundaries.
				333	At 0.6 kbps, this is still a minimal bitrate impact over a naive, low quality
Ralph Giles	1e0b6fd	2014-01-14 17:23:00 -0800	[diff] [blame]	334	solution.
				335	</t>
				336
				337	<t>
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	338	Since medium-band audio is an option only in the SILK mode, wideband frames
Ron	b331167	2014-01-19 16:03:44 +1030	[diff] [blame]	339	SHOULD be generated if switching from that configuration to CELT mode, to
				340	ensure that any PLC implementation which does try to migrate state between
				341	the modes will be able to preserve all of the available audio bandwidth.
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	342	</t>
				343
Ralph Giles	998e9e0	2014-01-14 15:40:16 -0800	[diff] [blame]	344	</section>
				345
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	346	<section anchor="preskip" title="Pre-skip">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	347	<t>
				348	There is some amount of latency introduced during the decoding process, to
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	349	allow for overlap in the CELT mode, stereo mixing in the SILK mode, and
				350	resampling.
				351	The encoder will also introduce latency (though the exact amount is not
				352	specified).
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	353	Therefore, the first few samples produced by the decoder do not correspond to
				354	real input audio, but are instead composed of padding inserted by the encoder
				355	to compensate for this latency.
				356	These samples need to be stored and decoded, as Opus is an asymptotically
				357	convergent predictive codec, meaning the decoded contents of each frame depend
				358	on the recent history of decoder inputs.
				359	However, a decoder will want to skip these samples after decoding them.
				360	</t>
				361
				362	<t>
				363	A 'pre-skip' field in the ID header (see <xref target="id_header"/>) signals
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	364	the number of samples which SHOULD be skipped (decoded but discarded) at the
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	365	beginning of the stream.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	366	This provides sufficient history to the decoder so that it has already
				367	converged before the stream's output begins.
				368	It may also be used to perform sample-accurate cropping of existing encoded
				369	streams.
				370	This amount need not be a multiple of 2.5 ms, may be smaller than a single
				371	packet, or may span the contents of several packets.
				372	</t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	373	</section>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	374
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	375	<section anchor="pcm_sample_position" title="PCM Sample Position">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	376	<t>
				377	The PCM sample position is determined from the granule position using the
				378	formula
				379	<figure align="center">
				380	<artwork align="center"><![CDATA[
				381	'PCM sample position' = 'granule position' - 'pre-skip' .
				382	]]></artwork>
				383	</figure>
				384	</t>
				385
				386	<t>
				387	For example, if the granule position of the first audio data page is 59,971,
				388	and the pre-skip is 11,971, then the PCM sample position of the last decoded
				389	sample from that page is 48,000.
				390	This can be converted into a playback time using the formula
				391	<figure align="center">
				392	<artwork align="center"><![CDATA[
				393	'PCM sample position'
				394	'playback time' = --------------------- .
				395	48000.0
				396	]]></artwork>
				397	</figure>
				398	</t>
				399
				400	<t>
				401	The initial PCM sample position before any samples are played is normally '0'.
				402	In this case, the PCM sample position of the first audio sample to be played
				403	starts at '1', because it marks the time on the clock
				404	<spanx style="emph">after</spanx> that sample has been played, and a stream
				405	that is exactly one second long has a final PCM sample position of '48000',
				406	as in the example here.
				407	</t>
				408
				409	<t>
				410	Vorbis streams use a granule position smaller than the number of audio samples
				411	contained in the first audio data page to indicate that some of those samples
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	412	must be trimmed from the output (see <xref target="vorbis-trim"/>).
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	413	However, to do so, Vorbis requires that the first audio data page contains
				414	exactly two packets, in order to allow the decoder to perform PCM position
				415	adjustments before needing to return any PCM data.
				416	Opus uses the pre-skip mechanism for this purpose instead, since the encoder
				417	may introduce more than a single packet's worth of latency, and since very
Ralph Giles	b0794ba	2012-07-16 17:37:54 -0400	[diff] [blame]	418	large packets in streams with a very large number of channels might not fit
				419	on a single page.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	420	</t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	421	</section>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	422
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	423	<section anchor="end_trimming" title="End Trimming">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	424	<t>
				425	The page with the 'end of stream' flag set MAY have a granule position that
				426	indicates the page contains less audio data than would normally be returned by
				427	decoding up through the final packet.
				428	This is used to end the stream somewhere other than an even frame boundary.
				429	The granule position of the most recent audio data page with completed packets
				430	is used to make this determination, or '0' is used if there were no previous
				431	audio data pages with a completed packet.
				432	The difference between these granule positions indicates how many samples to
				433	keep after decoding the packets that completed on the final page.
				434	The remaining samples are discarded.
				435	The number of discarded samples SHOULD be no larger than the number decoded
				436	from the last packet.
				437	</t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	438	</section>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	439
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	440	<section anchor="start_granpos_restrictions"
				441	title="Restrictions on the Initial Granule Position">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	442	<t>
				443	The granule position of the first audio data page with a completed packet MAY
				444	be larger than the number of samples contained in packets that complete on
				445	that page, however it MUST NOT be smaller, unless that page has the 'end of
				446	stream' flag set.
				447	Allowing a granule position larger than the number of samples allows the
				448	beginning of a stream to be cropped or a live stream to be joined without
				449	rewriting the granule position of all the remaining pages.
				450	This means that the PCM sample position just before the first sample to be
				451	played may be larger than '0'.
				452	Synchronization when multiplexing with other logical streams still uses the PCM
				453	sample position relative to '0' to compute sample times.
				454	This does not affect the behavior of pre-skip: exactly 'pre-skip' samples
				455	should be skipped from the beginning of the decoded output, even if the
				456	initial PCM sample position is greater than zero.
				457	</t>
				458
				459	<t>
				460	On the other hand, a granule position that is smaller than the number of
				461	decoded samples prevents a demuxer from working backwards to assign each
				462	packet or each individual sample a valid granule position, since granule
				463	positions must be non-negative.
				464	A decoder MUST reject as invalid any stream where the granule position is
				465	smaller than the number of samples contained in packets that complete on the
				466	first audio data page with a completed packet, unless that page has the 'end
				467	of stream' flag set.
				468	It MAY defer this action until it decodes the last packet completed on that
				469	page.
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	470	</t>
				471
				472	<t>
				473	If that page has the 'end of stream' flag set, a demuxer MUST reject as invalid
				474	any stream where its granule position is smaller than the 'pre-skip' amount.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	475	This would indicate that more samples should be skipped from the initial
				476	decoded output than exist in the stream.
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	477	If the granule position is smaller than the number of decoded samples produced
				478	by the packets that complete on that page, then a demuxer MUST use an initial
				479	granule position of '0', and can work forwards from '0' to timestamp
				480	individual packets.
				481	If the granule position is larger than the number of decoded samples available,
				482	then the demuxer MUST still work backwards as described above, even if the
				483	'end of stream' flag is set, to determine the initial granule position, and
				484	thus the initial PCM sample position.
				485	Both of these will be greater than '0' in this case.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	486	</t>
				487	</section>
				488
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	489	<section anchor="seeking_and_preroll" title="Seeking and Pre-roll">
				490	<t>
				491	Seeking in Ogg files is best performed using a bisection search for a page
				492	whose granule position corresponds to a PCM position at or before the seek
				493	target.
				494	With appropriately weighted bisection, accurate seeking can be performed with
				495	just three or four bisections even in multi-gigabyte files.
				496	See <xref target="seeking"/> for general implementation guidance.
				497	</t>
				498
				499	<t>
				500	When seeking within an Ogg Opus stream, the decoder SHOULD start decoding (and
				501	discarding the output) at least 3840 samples (80 ms) prior to the
				502	seek target in order to ensure that the output audio is correct by the time it
				503	reaches the seek target.
				504	This 'pre-roll' is separate from, and unrelated to, the 'pre-skip' used at the
				505	beginning of the stream.
				506	If the point 80 ms prior to the seek target comes before the initial PCM
				507	sample position, the decoder SHOULD start decoding from the beginning of the
				508	stream, applying pre-skip as normal, regardless of whether the pre-skip is
Timothy B. Terriberry	dd2520c	2012-11-19 15:01:01 -0800	[diff] [blame]	509	larger or smaller than 80 ms, and then continue to discard the samples
				510	required to reach the seek target (if any).
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	511	</t>
				512	</section>
				513
				514	</section>
				515
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	516	<section anchor="headers" title="Header Packets">
				517	<t>
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	518	An Opus stream contains exactly two mandatory header packets:
				519	an identification header and a comment header.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	520	</t>
				521
				522	<section anchor="id_header" title="Identification Header">
				523
				524	<figure anchor="id_header_packet" title="ID Header Packet" align="center">
				525	<artwork align="center"><![CDATA[
				526	0 1 2 3
				527	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
				528	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				529	\| 'O' \| 'p' \| 'u' \| 's' \|
				530	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				531	\| 'H' \| 'e' \| 'a' \| 'd' \|
				532	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				533	\| Version = 1 \| Channel Count \| Pre-skip \|
				534	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				535	\| Input Sample Rate (Hz) \|
				536	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				537	\| Output Gain (Q7.8 in dB) \| Mapping Family\| \|
				538	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ :
				539	\| \|
				540	: Optional Channel Mapping Table... :
				541	\| \|
				542	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				543	]]></artwork>
				544	</figure>
				545
				546	<t>
				547	The fields in the identification (ID) header have the following meaning:
				548	<list style="numbers">
				549	<t><spanx style="strong">Magic Signature</spanx>:
				550	<vspace blankLines="1"/>
				551	This is an 8-octet (64-bit) field that allows codec identification and is
				552	human-readable.
				553	It contains, in order, the magic numbers:
				554	<list style="empty">
				555	<t>0x4F 'O'</t>
				556	<t>0x70 'p'</t>
				557	<t>0x75 'u'</t>
				558	<t>0x73 's'</t>
				559	<t>0x48 'H'</t>
				560	<t>0x65 'e'</t>
				561	<t>0x61 'a'</t>
				562	<t>0x64 'd'</t>
				563	</list>
				564	Starting with "Op" helps distinguish it from audio data packets, as this is an
				565	invalid TOC sequence.
				566	<vspace blankLines="1"/>
				567	</t>
				568	<t><spanx style="strong">Version</spanx> (8 bits, unsigned):
				569	<vspace blankLines="1"/>
				570	The version number MUST always be '1' for this version of the encapsulation
				571	specification.
				572	Implementations SHOULD treat streams where the upper four bits of the version
				573	number match that of a recognized specification as backwards-compatible with
				574	that specification.
				575	That is, the version number can be split into "major" and "minor" version
				576	sub-fields, with changes to the "minor" sub-field (in the lower four bits)
				577	signaling compatible changes.
				578	For example, a decoder implementing this specification SHOULD accept any stream
				579	with a version number of '15' or less, and SHOULD assume any stream with a
				580	version number '16' or greater is incompatible.
				581	The initial version '1' was chosen to keep implementations from relying on this
				582	octet as a null terminator for the "OpusHead" string.
				583	<vspace blankLines="1"/>
				584	</t>
				585	<t><spanx style="strong">Output Channel Count</spanx> 'C' (8 bits, unsigned):
				586	<vspace blankLines="1"/>
				587	This is the number of output channels.
				588	This might be different than the number of encoded channels, which can change
				589	on a packet-by-packet basis.
				590	This value MUST NOT be zero.
				591	The maximum allowable value depends on the channel mapping family, and might be
				592	as large as 255.
				593	See <xref target="channel_mapping"/> for details.
				594	<vspace blankLines="1"/>
				595	</t>
				596	<t><spanx style="strong">Pre-skip</spanx> (16 bits, unsigned, little
				597	endian):
				598	<vspace blankLines="1"/>
				599	This is the number of samples (at 48 kHz) to discard from the decoder
				600	output when starting playback, and also the number to subtract from a page's
				601	granule position to calculate its PCM sample position.
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	602	When cropping the beginning of existing Ogg Opus streams, a pre-skip of at
				603	least 3,840 samples (80 ms) is RECOMMENDED to ensure complete
				604	convergence in the decoder.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	605	<vspace blankLines="1"/>
				606	</t>
				607	<t><spanx style="strong">Input Sample Rate</spanx> (32 bits, unsigned, little
				608	endian):
				609	<vspace blankLines="1"/>
				610	This field is <spanx style="emph">not</spanx> the sample rate to use for
				611	playback of the encoded data.
				612	<vspace blankLines="1"/>
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	613	Opus can switch between internal audio bandwidths of 4, 6, 8, 12, and
				614	20 kHz.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	615	Each packet in the stream may have a different audio bandwidth.
				616	Regardless of the audio bandwidth, the reference decoder supports decoding any
				617	stream at a sample rate of 8, 12, 16, 24, or 48 kHz.
				618	The original sample rate of the encoder input is not preserved by the lossy
				619	compression.
				620	<vspace blankLines="1"/>
				621	An Ogg Opus player SHOULD select the playback sample rate according to the
				622	following procedure:
				623	<list style="numbers">
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	624	<t>If the hardware supports 48 kHz playback, decode at 48 kHz.</t>
				625	<t>Otherwise, if the hardware's highest available sample rate is a supported
				626	rate, decode at this sample rate.</t>
				627	<t>Otherwise, if the hardware's highest available sample rate is less than
				628	48 kHz, decode at the highest supported rate above this and resample.</t>
				629	<t>Otherwise, decode at 48 kHz and resample.</t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	630	</list>
				631	However, the 'Input Sample Rate' field allows the encoder to pass the sample
				632	rate of the original input stream as metadata.
				633	This may be useful when the user requires the output sample rate to match the
				634	input sample rate.
				635	For example, a non-player decoder writing PCM format samples to disk might
				636	choose to resample the output audio back to the original input sample rate to
				637	reduce surprise to the user, who might reasonably expect to get back a file
				638	with the same sample rate as the one they fed to the encoder.
				639	<vspace blankLines="1"/>
				640	A value of zero indicates 'unspecified'.
				641	Encoders SHOULD write the actual input sample rate or zero, but decoder
				642	implementations which do something with this field SHOULD take care to behave
				643	sanely if given crazy values (e.g., do not actually upsample the output to
				644	10 MHz if requested).
				645	<vspace blankLines="1"/>
				646	</t>
				647	<t><spanx style="strong">Output Gain</spanx> (16 bits, signed, little
				648	endian):
				649	<vspace blankLines="1"/>
				650	This is a gain to be applied by the decoder.
				651	It is 20*log10 of the factor to scale the decoder output by to achieve the
				652	desired playback volume, stored in a 16-bit, signed, two's complement
				653	fixed-point value with 8 fractional bits (i.e., Q7.8).
				654	To apply the gain, a decoder could use
				655	<figure align="center">
				656	<artwork align="center"><![CDATA[
				657	sample = pow(10, output_gain/(20.0256)) ,
				658	]]></artwork>
				659	</figure>
				660	where output_gain is the raw 16-bit value from the header.
				661	<vspace blankLines="1"/>
				662	Virtually all players and media frameworks should apply it by default.
				663	If a player chooses to apply any volume adjustment or gain modification, such
Timothy B. Terriberry	a7df963	2012-07-05 14:26:02 -0700	[diff] [blame]	664	as the R128_TRACK_GAIN (see <xref target="comment_header"/>) or a user-facing
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	665	volume knob, the adjustment MUST be applied in addition to this output gain in
				666	order to achieve playback at the desired volume.
				667	<vspace blankLines="1"/>
				668	An encoder SHOULD set this field to zero, and instead apply any gain prior to
				669	encoding, when this is possible and does not conflict with the user's wishes.
				670	The output gain should only be nonzero when the gain is adjusted after
				671	encoding, or when the user wishes to adjust the gain for playback while
				672	preserving the ability to recover the original signal amplitude.
				673	<vspace blankLines="1"/>
				674	Although the output gain has enormous range (+/- 128 dB, enough to amplify
				675	inaudible sounds to the threshold of physical pain), most applications can
				676	only reasonably use a small portion of this range around zero.
				677	The large range serves in part to ensure that gain can always be losslessly
				678	transferred between OpusHead and R128_TRACK_GAIN (see below) without
				679	saturating.
				680	<vspace blankLines="1"/>
				681	</t>
				682	<t><spanx style="strong">Channel Mapping Family</spanx> (8 bits,
				683	unsigned):
				684	<vspace blankLines="1"/>
				685	This octet indicates the order and semantic meaning of the various channels
				686	encoded in each Ogg packet.
				687	<vspace blankLines="1"/>
				688	Each possible value of this octet indicates a mapping family, which defines a
				689	set of allowed channel counts, and the ordered set of channel names for each
				690	allowed channel count.
				691	The details are described in <xref target="channel_mapping"/>.
				692	</t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	693	<t><spanx style="strong">Channel Mapping Table</spanx>:
				694	This table defines the mapping from encoded streams to output channels.
				695	It is omitted when the channel mapping family is 0, but REQUIRED otherwise.
				696	Its contents are specified in <xref target="channel_mapping"/>.
				697	</t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	698	</list>
				699	</t>
				700
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	701	<t>
				702	All fields in the ID headers are REQUIRED, except for the channel mapping
				703	table, which is omitted when the channel mapping family is 0.
				704	Implementations SHOULD reject ID headers which do not contain enough data for
				705	these fields, even if they contain a valid Magic Signature.
				706	Future versions of this specification, even backwards-compatible versions,
				707	might include additional fields in the ID header.
				708	If an ID header has a compatible major version, but a larger minor version,
				709	an implementation MUST NOT reject it for containing additional data not
				710	specified here.
				711	However, implementations MAY reject streams in which the ID header does not
				712	complete on the first page.
				713	</t>
				714
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	715	<section anchor="channel_mapping" title="Channel Mapping">
				716	<t>
				717	An Ogg Opus stream allows mapping one number of Opus streams (N) to a possibly
				718	larger number of decoded channels (M+N) to yet another number of output
				719	channels (C), which might be larger or smaller than the number of decoded
				720	channels.
Ralph Giles	9621e71	2012-07-17 17:35:12 -0400	[diff] [blame]	721	The order and meaning of these channels are defined by a channel mapping,
				722	which consists of the 'channel mapping family' octet and, for channel mapping
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	723	families other than family 0, a channel mapping table, as illustrated in
				724	<xref target="channel_mapping_table"/>.
				725	</t>
				726
				727	<figure anchor="channel_mapping_table" title="Channel Mapping Table"
				728	align="center">
				729	<artwork align="center"><![CDATA[
				730	0 1 2 3
				731	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
				732	+-+-+-+-+-+-+-+-+
				733	\| Stream Count \|
				734	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				735	\| Coupled Count \| Channel Mapping... :
				736	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				737	]]></artwork>
				738	</figure>
				739
				740	<t>
				741	The fields in the channel mapping table have the following meaning:
				742	<list style="numbers" counter="8">
				743	<t><spanx style="strong">Stream Count</spanx> 'N' (8 bits, unsigned):
				744	<vspace blankLines="1"/>
				745	This is the total number of streams encoded in each Ogg packet.
				746	This value is required to correctly parse the packed Opus packets inside an
				747	Ogg packet, as described in <xref target="packet_organization"/>.
				748	This value MUST NOT be zero, as without at least one Opus packet with a valid
				749	TOC sequence, a demuxer cannot recover the duration of an Ogg packet.
				750	<vspace blankLines="1"/>
				751	For channel mapping family 0, this value defaults to 1, and is not coded.
				752	<vspace blankLines="1"/>
				753	</t>
				754	<t><spanx style="strong">Coupled Stream Count</spanx> 'M' (8 bits, unsigned):
				755	This is the number of streams whose decoders should be configured to produce
				756	two channels.
				757	This MUST be no larger than the total number of streams, N.
				758	<vspace blankLines="1"/>
				759	Each packet in an Opus stream has an internal channel count of 1 or 2, which
				760	can change from packet to packet.
Ralph Giles	e7aa6cb	2013-01-11 17:11:07 -0800	[diff] [blame]	761	This is selected by the encoder depending on the bitrate and the audio being
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	762	encoded.
				763	The original channel count of the encoder input is not preserved by the lossy
				764	compression.
				765	<vspace blankLines="1"/>
				766	Regardless of the internal channel count, any Opus stream can be decoded as
				767	mono (a single channel) or stereo (two channels) by appropriate initialization
				768	of the decoder.
				769	The 'coupled stream count' field indicates that the first M Opus decoders are
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	770	to be initialized for stereo output, and the remaining N-M decoders are to be
				771	initialized for mono only.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	772	The total number of decoded channels, (M+N), MUST be no larger than 255, as
				773	there is no way to index more channels than that in the channel mapping.
				774	<vspace blankLines="1"/>
				775	For channel mapping family 0, this value defaults to C-1 (i.e., 0 for mono
				776	and 1 for stereo), and is not coded.
				777	<vspace blankLines="1"/>
				778	</t>
				779	<t><spanx style="strong">Channel Mapping</spanx> (8*C bits):
				780	This contains one octet per output channel, indicating which decoded channel
				781	should be used for each one.
				782	Let 'index' be the value of this octet for a particular output channel.
				783	This value MUST either be smaller than (M+N), or be the special value 255.
				784	If 'index' is less than 2*M, the output MUST be taken from decoding stream
				785	('index'/2) as stereo and selecting the left channel if 'index' is even, and
				786	the right channel if 'index' is odd.
				787	If 'index' is 2*M or larger, the output MUST be taken from decoding stream
				788	('index'-M) as mono.
				789	If 'index' is 255, the corresponding output channel MUST contain pure silence.
				790	<vspace blankLines="1"/>
				791	The number of output channels, C, is not constrained to match the number of
				792	decoded channels (M+N).
				793	A single index value MAY appear multiple times, i.e., the same decoded channel
				794	might be mapped to multiple output channels.
				795	Some decoded channels might not be assigned to any output channel, as well.
				796	<vspace blankLines="1"/>
				797	For channel mapping family 0, the first index defaults to 0, and if C==2,
				798	the second index defaults to 1.
				799	Neither index is coded.
				800	</t>
				801	</list>
				802	</t>
				803
				804	<t>
				805	After producing the output channels, the channel mapping family determines the
				806	semantic meaning of each one.
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	807	Currently there are three defined mapping families, although more may be added.
				808	</t>
				809
				810	<section anchor="channel_mapping_0" title="Channel Mapping Family 0">
				811	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	812	Allowed numbers of channels: 1 or 2.
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	813	RTP mapping.
				814	</t>
				815	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	816	<list style="symbols">
				817	<t>1 channel: monophonic (mono).</t>
				818	<t>2 channels: stereo (left, right).</t>
				819	</list>
				820	<spanx style="strong">Special mapping</spanx>: This channel mapping value also
				821	indicates that the contents consists of a single Opus stream that is stereo if
Ralph Giles	e7aa6cb	2013-01-11 17:11:07 -0800	[diff] [blame]	822	and only if C==2, with stream index 0 mapped to output channel 0 (mono, or
				823	left channel) and stream index 1 mapped to output channel 1 (right channel)
				824	if stereo.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	825	When the 'channel mapping family' octet has this value, the channel mapping
				826	table MUST be omitted from the ID header packet.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	827	</t>
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	828	</section>
				829
				830	<section anchor="channel_mapping_1" title="Channel Mapping Family 1">
				831	<t>
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	832	Allowed numbers of channels: 1...8.
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	833	Vorbis channel order.
				834	</t>
				835	<t>
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	836	Each channel is assigned to a speaker location in a conventional surround
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	837	arrangement.
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	838	Specific locations depend on the number of channels, and are given below
				839	in order of the corresponding channel indicies.
				840	<list style="symbols">
				841	<t>1 channel: monophonic (mono).</t>
				842	<t>2 channels: stereo (left, right).</t>
				843	<t>3 channels: linear surround (left, center, right)</t>
				844	<t>4 channels: quadraphonic (front left, front right, rear left, rear right).</t>
Ralph Giles	fc113b7	2013-01-11 17:01:38 -0800	[diff] [blame]	845	<t>5 channels: 5.0 surround (front left, front center, front right, rear left, rear right).</t>
				846	<t>6 channels: 5.1 surround (front left, front center, front right, rear left, rear right, LFE).</t>
				847	<t>7 channels: 6.1 surround (front left, front center, front right, side left, side right, rear center, LFE).</t>
				848	<t>8 channels: 7.1 surround (front left, front center, front right, side left, side right, rear left, rear right, LFE)</t>
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	849	</list>
Ron	e37262c	2014-01-19 15:20:05 +1030	[diff] [blame]	850	This set of surround options and speaker location orderings is the same
				851	as those used by the Vorbis codec <xref target="vorbis-mapping"/>.
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	852	The ordering is different from the one used by the
				853	WAVE <xref target="wave-multichannel"/> and
				854	FLAC <xref target="flac"/> formats,
Ralph Giles	1b0552b	2013-05-25 01:43:06 +0800	[diff] [blame]	855	so correct ordering requires permutation of the output channels when encoding
				856	from or decoding to those formats.
Ralph Giles	e7aa6cb	2013-01-11 17:11:07 -0800	[diff] [blame]	857	'LFE' here refers to a Low Frequency Effects, often mapped to a subwoofer
				858	with no particular spacial position.
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	859	Implementations SHOULD identify 'side' or 'rear' speaker locations with
Ralph Giles	e7aa6cb	2013-01-11 17:11:07 -0800	[diff] [blame]	860	'surround' and 'back' as appropriate when interfacing with audio formats
				861	or systems which prefer that terminology.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	862	</t>
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	863	</section>
				864
				865	<section anchor="channel_mapping_255"
				866	title="Channel Mapping Family 255">
				867	<t>
				868	Allowed numbers of channels: 1...255.
				869	No defined channel meaning.
				870	</t>
				871	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	872	Channels are unidentified.
				873	General-purpose players SHOULD NOT attempt to play these streams, and offline
				874	decoders MAY deinterleave the output into separate PCM files, one per channel.
				875	Decoders SHOULD NOT produce output for channels mapped to stream index 255
				876	(pure silence) unless they have no other way to indicate the index of
				877	non-silent channels.
				878	</t>
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	879	</section>
				880
				881	<section anchor="channel_mapping_undefined"
				882	title="Undefined Channel Mappings">
				883	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	884	The remaining channel mapping families (2...254) are reserved.
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	885	A decoder encountering a reserved channel mapping family value SHOULD act as
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	886	though the value is 255.
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	887	</t>
				888	</section>
				889
				890	<section anchor="downmix" title="Downmixing">
				891	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	892	An Ogg Opus player MUST play any Ogg Opus stream with a channel mapping family
				893	of 0 or 1, even if the number of channels does not match the physically
				894	connected audio hardware.
				895	Players SHOULD perform channel mixing to increase or reduce the number of
				896	channels as needed.
				897	</t>
				898
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	899	<t>
				900	Implementations MAY use the following matricies to implement downmixing from
				901	multichannel files using <xref target="channel_mapping_1">Channel Mapping
				902	Family 1</xref>, which are known to give acceptable results for stereo.
				903	Matricies for 3 and 4 channels are normalized so each coefficent row sums
				904	to 1 to avoid clipping.
				905	For 5 or more channels they are normalized to 2 as a compromize between
				906	clipping and dynamic range reduction.
				907	</t>
				908	<t>
				909	In these matricies the front left and front right channels are generally
				910	passed through directly.
				911	When a surround channel is split between both the left and right stereo
				912	channels, coefficients are chosen so their squares sum to 1, which
				913	helps preserve the perceived intensity.
				914	Rear channels are mixed more diffusely or attenuated to maintain focus
				915	on the front channels.
				916	</t>
				917
				918	<figure anchor="downmix-matrix-3"
				919	title="Stereo downmix matrix for the linear surround channel mapping"
				920	align="center">
				921	<artwork align="center"><![CDATA[
				922	Left output = ( 0.585786 * left + 0.414214 * center )
				923	Right output = ( 0.414214 * center + 0.585786 * right )
				924	]]></artwork>
				925	<postamble>
				926	Exact coefficient values are 1 and 1/sqrt(2), multiplied by
				927	1/(1 + 1/sqrt(2)) for normalization.
				928	</postamble>
				929	</figure>
				930
				931	<figure anchor="downmix-matrix-4"
				932	title="Stereo downmix matrix for the quadraphonic channel mapping"
				933	align="center">
				934	<artwork align="center"><![CDATA[
				935	/ \ / \ / FL \
				936	\| L output \| \| 0.422650 0.000000 0.366025 0.211325 \| \| FR \|
				937	\| R output \| = \| 0.000000 0.422650 0.211325 0.366025 \| \| RL \|
				938	\ / \ / \ RR /
				939	]]></artwork>
				940	<postamble>
				941	Exact coefficient values are 1, sqrt(3)/2 and 1/2, multiplied by
				942	1/(1 + sqrt(3)/2 + 1/2) for normalization.
				943	</postamble>
				944	</figure>
				945
				946	<figure anchor="downmix-matrix-5"
				947	title="Stereo downmix matrix for the 5.0 surround mapping"
				948	align="center">
				949	<artwork align="center"><![CDATA[
				950	/ FL \
				951	/ \ / \ \| FC \|
				952	\| L \| \| 0.650802 0.460186 0.000000 0.563611 0.325401 \| \| FR \|
				953	\| R \| = \| 0.000000 0.460186 0.650802 0.325401 0.563611 \| \| RL \|
				954	\ / \ / \| RR \|
				955	\ /
				956	]]></artwork>
				957	<postamble>
				958	Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
				959	2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2)
				960	for normalization.
				961	</postamble>
				962	</figure>
				963
				964	<figure anchor="downmix-matrix-6"
				965	title="Stereo downmix matrix for the 5.1 surround mapping"
				966	align="center">
				967	<artwork align="center"><![CDATA[
				968	/FL \
				969	/ \ / \ \|FC \|
				970	\|L\| \| 0.529067 0.374107 0.000000 0.458186 0.264534 0.374107 \| \|FR \|
				971	\|R\| = \| 0.000000 0.374107 0.529067 0.264534 0.458186 0.374107 \| \|RL \|
				972	\ / \ / \|RR \|
				973	\LFE/
				974	]]></artwork>
				975	<postamble>
				976	Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
				977	2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2 + 1/sqrt(2))
				978	for normalization.
				979	</postamble>
				980	</figure>
				981
				982	<figure anchor="downmix-matrix-7"
				983	title="Stereo downmix matrix for the 6.1 surround mapping"
				984	align="center">
				985	<artwork align="center"><![CDATA[
				986	/ \
				987	\| 0.455310 0.321953 0.000000 0.394310 0.227655 0.278819 0.321953 \|
				988	\| 0.000000 0.321953 0.455310 0.227655 0.394310 0.278819 0.321953 \|
				989	\ /
				990	]]></artwork>
				991	<postamble>
				992	Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2, 1/2 and
				993	sqrt(3)/2/sqrt(2), multiplied by
				994	2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2 +
				995	sqrt(3)/2/sqrt(2) + 1/sqrt(2)) for normalization.
				996	The coeffients are in the same order as in <xref target="channel_mapping_1" />,
				997	and the matricies above.
				998	</postamble>
				999	</figure>
				1000
				1001	<figure anchor="downmix-matrix-8"
				1002	title="Stereo downmix matrix for the 7.1 surround mapping"
				1003	align="center">
				1004	<artwork align="center"><![CDATA[
				1005	/ \
				1006	\| .388631 .274804 .000000 .336565 .194316 .336565 .194316 .274804 \|
				1007	\| .000000 .274804 .388631 .194316 .336565 .194316 .336565 .274804 \|
				1008	\ /
				1009	]]></artwork>
				1010	<postamble>
				1011	Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
				1012	2/(2 + 2/sqrt(2) + sqrt(3)) for normalization.
				1013	The coeffients are in the same order as in <xref target="channel_mapping_1" />,
				1014	and the matricies above.
				1015	</postamble>
				1016	</figure>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1017
				1018	</section>
				1019
Ralph Giles	2fd3d0a	2013-01-16 16:30:55 -0800	[diff] [blame]	1020	</section> <!-- end channel_mapping_table -->
				1021
				1022	</section> <!-- end id_header -->
				1023
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1024	<section anchor="comment_header" title="Comment Header">
				1025
				1026	<figure anchor="comment_header_packet" title="Comment Header Packet"
				1027	align="center">
				1028	<artwork align="center"><![CDATA[
				1029	0 1 2 3
				1030	0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
				1031	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1032	\| 'O' \| 'p' \| 'u' \| 's' \|
				1033	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1034	\| 'T' \| 'a' \| 'g' \| 's' \|
				1035	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1036	\| Vendor String Length \|
				1037	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1038	\| \|
				1039	: Vendor String... :
				1040	\| \|
				1041	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1042	\| User Comment List Length \|
				1043	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1044	\| User Comment #0 String Length \|
				1045	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1046	\| \|
				1047	: User Comment #0 String... :
				1048	\| \|
				1049	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1050	\| User Comment #1 String Length \|
				1051	+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
				1052	: :
				1053	]]></artwork>
				1054	</figure>
				1055
				1056	<t>
				1057	The comment header consists of a 64-bit magic signature, followed by data in
				1058	the same format as the <xref target="vorbis-comment"/> header used in Ogg
Ralph Giles	bfcc1dd	2014-01-15 09:02:01 -0800	[diff] [blame]	1059	Vorbis, except (like Ogg Theora and Speex) the final "framing bit" specified
				1060	in the Vorbis spec is not present.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1061	<list style="numbers">
				1062	<t><spanx style="strong">Magic Signature</spanx>:
				1063	<vspace blankLines="1"/>
				1064	This is an 8-octet (64-bit) field that allows codec identification and is
				1065	human-readable.
				1066	It contains, in order, the magic numbers:
				1067	<list style="empty">
				1068	<t>0x4F 'O'</t>
				1069	<t>0x70 'p'</t>
				1070	<t>0x75 'u'</t>
				1071	<t>0x73 's'</t>
				1072	<t>0x54 'T'</t>
				1073	<t>0x61 'a'</t>
				1074	<t>0x67 'g'</t>
				1075	<t>0x73 's'</t>
				1076	</list>
				1077	Starting with "Op" helps distinguish it from audio data packets, as this is an
				1078	invalid TOC sequence.
				1079	<vspace blankLines="1"/>
				1080	</t>
				1081	<t><spanx style="strong">Vendor String Length</spanx> (32 bits, unsigned,
				1082	little endian):
				1083	<vspace blankLines="1"/>
				1084	This field gives the length of the following vendor string, in octets.
				1085	It MUST NOT indicate that the vendor string is longer than the rest of the
				1086	packet.
				1087	<vspace blankLines="1"/>
				1088	</t>
				1089	<t><spanx style="strong">Vendor String</spanx> (variable length, UTF-8 vector):
				1090	<vspace blankLines="1"/>
				1091	This is a simple human-readable tag for vendor information, encoded as a UTF-8
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1092	string <xref target="RFC3629"/>.
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	1093	No terminating null octet is required.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1094	<vspace blankLines="1"/>
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1095	This tag is intended to identify the codec encoder and encapsulation
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1096	implementations, for tracing differences in technical behavior.
Ralph Giles	aff527e	2012-07-16 17:36:52 -0400	[diff] [blame]	1097	User-facing encoding applications can use the 'ENCODER' user comment tag
				1098	to identify themselves.
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1099	<vspace blankLines="1"/>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1100	</t>
				1101	<t><spanx style="strong">User Comment List Length</spanx> (32 bits, unsigned,
				1102	little endian):
				1103	<vspace blankLines="1"/>
				1104	This field indicates the number of user-supplied comments.
				1105	It MAY indicate there are zero user-supplied comments, in which case there are
				1106	no additional fields in the packet.
				1107	It MUST NOT indicate that there are so many comments that the comment string
				1108	lengths would require more data than is available in the rest of the packet.
				1109	<vspace blankLines="1"/>
				1110	</t>
				1111	<t><spanx style="strong">User Comment #i String Length</spanx> (32 bits,
				1112	unsigned, little endian):
				1113	<vspace blankLines="1"/>
				1114	This field gives the length of the following user comment string, in octets.
				1115	There is one for each user comment indicated by the 'user comment list length'
				1116	field.
				1117	It MUST NOT indicate that the string is longer than the rest of the packet.
				1118	<vspace blankLines="1"/>
				1119	</t>
				1120	<t><spanx style="strong">User Comment #i String</spanx> (variable length, UTF-8
				1121	vector):
				1122	<vspace blankLines="1"/>
				1123	This field contains a single user comment string.
				1124	There is one for each user comment indicated by the 'user comment list length'
				1125	field.
				1126	</t>
				1127	</list>
				1128	</t>
				1129
				1130	<t>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1131	The vendor string length and user comment list length are REQUIRED, and
				1132	implementations SHOULD reject comment headers that do not contain enough data
				1133	for these fields, or that do not contain enough data for the corresponding
				1134	vendor string or user comments they describe.
				1135	Making this check before allocating the associated memory to contain the data
				1136	may help prevent a possible Denial-of-Service (DoS) attack from small comment
				1137	headers that claim to contain strings longer than the entire packet or more
				1138	user comments than than could possibly fit in the packet.
				1139	</t>
				1140
				1141	<t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1142	The user comment strings follow the NAME=value format described by
				1143	<xref target="vorbis-comment"/> with the same recommended tag names.
				1144	One new comment tag is introduced for Ogg Opus:
				1145	<figure align="center">
				1146	<artwork align="left"><![CDATA[
				1147	R128_TRACK_GAIN=-573
				1148	]]></artwork>
				1149	</figure>
				1150	representing the volume shift needed to normalize the track's volume.
				1151	The gain is a Q7.8 fixed point number in dB, as in the ID header's 'output
				1152	gain' field.
				1153	This tag is similar to the REPLAYGAIN_TRACK_GAIN tag in
				1154	Vorbis <xref target="replay-gain"/>, except that the normal volume
				1155	reference is the <xref target="EBU-R128"/> standard.
				1156	</t>
				1157	<t>
				1158	An Ogg Opus file MUST NOT have more than one such tag, and if present its
				1159	value MUST be an integer from -32768 to 32767, inclusive, represented in
				1160	ASCII with no whitespace.
				1161	If present, it MUST correctly represent the R128 normalization gain relative
				1162	to the 'output gain' field specified in the ID header.
				1163	If a player chooses to make use of the R128_TRACK_GAIN tag, it MUST be
				1164	applied <spanx style="emph">in addition</spanx> to the 'output gain' value.
				1165	If an encoder wishes to use R128 normalization, and the output gain is not
				1166	otherwise constrained or specified, the encoder SHOULD write the R128 gain
				1167	into the 'output gain' field and store a tag containing "R128_TRACK_GAIN=0".
				1168	That is, it should assume that by default tools will respect the 'output gain'
				1169	field, and not the comment tag.
				1170	If a tool modifies the ID header's 'output gain' field, it MUST also update or
				1171	remove the R128_TRACK_GAIN comment tag.
				1172	</t>
				1173	<t>
				1174	To avoid confusion with multiple normalization schemes, an Opus comment header
				1175	SHOULD NOT contain any of the REPLAYGAIN_TRACK_GAIN, REPLAYGAIN_TRACK_PEAK,
				1176	REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags.
				1177	</t>
				1178	<t>
				1179	There is no Opus comment tag corresponding to REPLAYGAIN_ALBUM_GAIN.
				1180	That information should instead be stored in the ID header's 'output gain'
				1181	field.
				1182	</t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1183	</section>
				1184
				1185	</section>
				1186
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1187	<section anchor="packet_size_limits" title="Packet Size Limits">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1188	<t>
				1189	Technically valid Opus packets can be arbitrarily large due to the padding
				1190	format, although the amount of non-padding data they can contain is bounded.
				1191	These packets might be spread over a similarly enormous number of Ogg pages.
				1192	Encoders SHOULD use no more padding than required to make a variable bitrate
				1193	(VBR) stream constant bitrate (CBR).
				1194	Decoders SHOULD avoid attempting to allocate excessive amounts of memory when
				1195	presented with a very large packet.
				1196	The presence of an extremely large packet in the stream could indicate a
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1197	memory exhaustion attack or stream corruption.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1198	Decoders SHOULD reject a packet that is too large to process, and display a
				1199	warning message.
				1200	</t>
				1201	<t>
				1202	In an Ogg Opus stream, the largest possible valid packet that does not use
				1203	padding has a size of (61,298*N - 2) octets, or about 60 kB per
				1204	Opus stream.
				1205	With 255 streams, this is 15,630,988 octets (14.9 MB) and can
				1206	span up to 61,298 Ogg pages, all but one of which will have a granule
				1207	position of -1.
				1208	This is of course a very extreme packet, consisting of 255 streams, each
				1209	containing 120 ms of audio encoded as 2.5 ms frames, each frame
				1210	using the maximum possible number of octets (1275) and stored in the least
				1211	efficient manner allowed (a VBR code 3 Opus packet).
Ralph Giles	7ad2f43	2013-01-10 14:15:41 -0800	[diff] [blame]	1212	Even in such a packet, most of the data will be zeros as 2.5 ms frames
				1213	cannot actually use all 1275 octets.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1214	The largest packet consisting of entirely useful data is
				1215	(15,326*N - 2) octets, or about 15 kB per stream.
				1216	This corresponds to 120 ms of audio encoded as 10 ms frames in either
Ralph Giles	e26ed59	2014-01-17 14:33:54 -0800	[diff] [blame]	1217	SILK or Hybrid mode, but at a data rate of over 1 Mbps, which makes little
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1218	sense for the quality achieved.
				1219	A more reasonable limit is (7,664*N - 2) octets, or about 7.5 kB
				1220	per stream.
Ralph Giles	e26ed59	2014-01-17 14:33:54 -0800	[diff] [blame]	1221	This corresponds to 120 ms of audio encoded as 20 ms stereo CELT mode
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1222	frames, with a total bitrate just under 511 kbps (not counting the Ogg
				1223	encapsulation overhead).
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1224	With N=8, the maximum number of channels currently defined by mapping
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1225	family 1, this gives a maximum packet size of 61,310 octets, or just
				1226	under 60 kB.
				1227	This is still quite conservative, as it assumes each output channel is taken
				1228	from one decoded channel of a stereo packet.
				1229	An implementation could reasonably choose any of these numbers for its internal
				1230	limits.
				1231	</t>
				1232	</section>
				1233
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1234	<section anchor="encoder" title="Encoder Guidelines">
				1235	<t>
				1236	When encoding Opus files, Ogg encoders should take into account the
				1237	algorithmic delay of the Opus encoder.
Ralph Giles	b243dca	2013-05-25 01:23:41 +0800	[diff] [blame]	1238	</t>
				1239	<figure align="center">
				1240	<preamble>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1241	In encoders derived from the reference implementation, the number of
				1242	samples can be queried with:
Ralph Giles	b243dca	2013-05-25 01:23:41 +0800	[diff] [blame]	1243	</preamble>
				1244	<artwork align="center"><![CDATA[
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1245	opus_encoder_ctl(encoder_state, OPUS_GET_LOOKAHEAD, &delay_samples);
Ralph Giles	b243dca	2013-05-25 01:23:41 +0800	[diff] [blame]	1246	]]></artwork>
				1247	</figure>
				1248	<t>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1249	To achieve good quality in the very first samples of a stream, the Ogg encoder
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1250	MAY use linear predictive coding (LPC) extrapolation
				1251	<xref target="linear-prediction"/> to generate at least 120 extra samples at
				1252	the beginning to avoid the Opus encoder having to encode a discontinuous
				1253	signal.
				1254	For an input file containing 'length' samples, the Ogg encoder SHOULD set the
Ralph Giles	cf33d32	2014-01-17 16:16:46 -0800	[diff] [blame]	1255	pre-skip header value to delay_samples+extra_samples, encode at least
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1256	length+delay_samples+extra_samples samples, and set the granulepos of the last
				1257	page to length+delay_samples+extra_samples.
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1258	This ensures that the encoded file has the same duration as the original, with
				1259	no time offset. The best way to pad the end of the stream is to also use LPC
				1260	extrapolation, but zero-padding is also acceptable.
				1261	</t>
				1262
				1263	<section anchor="lpc" title="LPC Extrapolation">
				1264	<t>
				1265	The first step in LPC extrapolation is to compute linear prediction
Ralph Giles	078775b	2014-01-17 15:44:08 -0800	[diff] [blame]	1266	coefficients. <xref target="lpc-sample"/>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1267	When extending the end of the signal, order-N (typically with N ranging from 8
				1268	to 40) LPC analysis is performed on a window near the end of the signal.
				1269	The last N samples are used as memory to an infinite impulse response (IIR)
				1270	filter.
Ralph Giles	7918ac1	2013-05-25 01:16:23 +0800	[diff] [blame]	1271	</t>
				1272	<figure align="center">
				1273	<preamble>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1274	The filter is then applied on a zero input to extrapolate the end of the signal.
				1275	Let a(k) be the kth LPC coefficient and x(n) be the nth sample of the signal,
				1276	each new sample past the end of the signal is computed as:
Ralph Giles	7918ac1	2013-05-25 01:16:23 +0800	[diff] [blame]	1277	</preamble>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1278	<artwork align="center"><![CDATA[
				1279	N
				1280	---
				1281	x(n) = \ a(k)*x(n-k)
				1282	/
				1283	---
				1284	k=1
				1285	]]></artwork>
Ralph Giles	7918ac1	2013-05-25 01:16:23 +0800	[diff] [blame]	1286	</figure>
				1287	<t>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1288	The process is repeated independently for each channel.
				1289	It is possible to extend the beginning of the signal by applying the same
				1290	process backward in time.
				1291	When extending the beginning of the signal, it is best to apply a "fade in" to
Ralph Giles	bd5cfda	2013-05-25 01:37:46 +0800	[diff] [blame]	1292	the extrapolated signal, e.g. by multiplying it by a half-Hanning window
				1293	<xref target="hanning"/>.
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1294	</t>
				1295
				1296	</section>
				1297
				1298	<section anchor="continuous_chaining" title="Continuous Chaining">
				1299	<t>
				1300	In some applications, such as Internet radio, it is desirable to cut a long
Ron	3f3cd99	2014-01-19 13:48:21 +1030	[diff] [blame]	1301	stream into smaller chains, e.g. so the comment header can be updated.
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1302	This can be done simply by separating the input streams into segments and
				1303	encoding each segment independently.
				1304	The drawback of this approach is that it creates a small discontinuity
				1305	at the boundary due to the lossy nature of Opus.
				1306	An encoder MAY avoid this discontinuity by using the following procedure:
				1307	<list style="numbers">
				1308	<t>Encode the last frame of the first segment as an independent frame by
Ralph Giles	785a21f	2014-01-17 16:02:52 -0800	[diff] [blame]	1309	turning off all forms of inter-frame prediction.
				1310	De-emphasis is allowed.</t>
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1311	<t>Set the granulepos of the last page to a point near the end of the last
				1312	frame.</t>
				1313	<t>Begin the second segment with a copy of the last frame of the first
				1314	segment.</t>
Ralph Giles	cf33d32	2014-01-17 16:16:46 -0800	[diff] [blame]	1315	<t>Set the pre-skip value of the second stream in such a way as to properly
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1316	join the two streams.</t>
				1317	<t>Continue the encoding process normally from there, without any reset to
				1318	the encoder.</t>
				1319	</list>
				1320	</t>
Ralph Giles	785a21f	2014-01-17 16:02:52 -0800	[diff] [blame]	1321	<figure align="center">
				1322	<preamble>
				1323	In encoders derived from the reference implementation, inter-frame prediction
				1324	can be turned off by calling:
				1325	</preamble>
				1326	<artwork align="center"><![CDATA[
				1327	opus_encoder_ctl(encoder_state, OPUS_SET_PREDICTION_DISABLED, 1);
				1328	]]></artwork>
				1329	<postamble>
				1330	Prediction should be enabled again before resuming normal encoding, even
				1331	after a reset.
				1332	</postamble>
				1333	</figure>
				1334
Ralph Giles	2ad6eaf	2013-05-24 18:28:58 +0800	[diff] [blame]	1335	</section>
				1336
Ralph Giles	7918ac1	2013-05-25 01:16:23 +0800	[diff] [blame]	1337	</section>
				1338
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1339	<section anchor="implementation" title="Implementation Status">
				1340	<t>
Ralph Giles	dfda81e	2013-05-24 17:44:43 +0800	[diff] [blame]	1341	A brief summary of major implementations of this draft is available
				1342	at <eref target="https://wiki.xiph.org/OggOpusImplementation"/>,
				1343	along with their status.
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1344	</t>
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1345	<t>
Ralph Giles	dfda81e	2013-05-24 17:44:43 +0800	[diff] [blame]	1346	[Note to RFC Editor: please remove this entire section before
				1347	final publication per <xref target="draft-sheffer-running-code"/>.]
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1348	</t>
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1349	</section>
				1350
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1351	<section anchor="security" title="Security Considerations">
				1352	<t>
				1353	Implementations of the Opus codec need to take appropriate security
				1354	considerations into account, as outlined in <xref target="RFC4732"/>.
				1355	This is just as much a problem for the container as it is for the codec itself.
				1356	It is extremely important for the decoder to be robust against malicious
				1357	payloads.
				1358	Malicious payloads must not cause the decoder to overrun its allocated memory
				1359	or to take an excessive amount of resources to decode.
				1360	Although problems in encoders are typically rarer, the same applies to the
				1361	encoder.
				1362	Malicious audio streams must not cause the encoder to misbehave because this
				1363	would allow an attacker to attack transcoding gateways.
				1364	</t>
				1365
				1366	<t>
				1367	Like most other container formats, Ogg Opus files should not be used with
				1368	insecure ciphers or cipher modes that are vulnerable to known-plaintext
				1369	attacks.
				1370	Elements such as the Ogg page capture pattern and the magic signatures in the
				1371	ID header and the comment header all have easily predictable values, in
				1372	addition to various elements of the codec data itself.
				1373	</t>
				1374	</section>
				1375
				1376	<section anchor="content_type" title="Content Type">
				1377	<t>
				1378	An "Ogg Opus file" consists of one or more sequentially multiplexed segments,
				1379	each containing exactly one Ogg Opus stream.
				1380	The RECOMMENDED mime-type for Ogg Opus files is "audio/ogg".
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1381	</t>
				1382
Ralph Giles	0431f93	2013-01-16 14:14:32 -0800	[diff] [blame]	1383	<figure>
				1384	<preamble>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1385	If more specificity is desired, one MAY indicate the presence of Opus streams
				1386	using the codecs parameter defined in <xref target="RFC6381"/>, e.g.,
Ralph Giles	0431f93	2013-01-16 14:14:32 -0800	[diff] [blame]	1387	</preamble>
				1388	<artwork align="center"><![CDATA[
				1389	audio/ogg; codecs=opus
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1390	]]></artwork>
Ralph Giles	0431f93	2013-01-16 14:14:32 -0800	[diff] [blame]	1391	<postamble>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1392	for an Ogg Opus file.
Ralph Giles	0431f93	2013-01-16 14:14:32 -0800	[diff] [blame]	1393	</postamble>
				1394	</figure>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1395
				1396	<t>
				1397	The RECOMMENDED filename extension for Ogg Opus files is '.opus'.
				1398	</t>
				1399
Ralph Giles	a1b913f	2013-01-10 17:00:18 -0800	[diff] [blame]	1400	<t>
				1401	When Opus is concurrently multiplexed with other streams in an Ogg container,
				1402	one SHOULD use one of the "audio/ogg", "video/ogg", or "application/ogg"
				1403	mime-types, as defined in <xref target="RFC5334"/>.
				1404	Such streams are not strictly "Ogg Opus files" as described above,
				1405	since they contain more than a single Opus stream per sequentially
				1406	multiplexed segment, e.g. video or multiple audio tracks.
				1407	In such cases the the '.opus' filename extension is NOT RECOMMENDED.
				1408	</t>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1409	</section>
				1410
				1411	<section title="IANA Considerations">
				1412	<t>
				1413	This document has no actions for IANA.
				1414	</t>
				1415	</section>
				1416
				1417	<section anchor="Acknowledgments" title="Acknowledgments">
				1418	<t>
Timothy B. Terriberry	b7985f3	2012-07-17 12:07:36 -0700	[diff] [blame]	1419	Thanks to Greg Maxwell, Christopher "Monty" Montgomery, and Jean-Marc Valin for
				1420	their valuable contributions to this document.
				1421	Additional thanks to Andrew D'Addesio, Greg Maxwell, and Vincent Penqeurc'h for
				1422	their feedback based on early implementations.
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1423	</t>
				1424	</section>
				1425
				1426	<section title="Copying Conditions">
				1427	<t>
				1428	The authors agree to grant third parties the irrevocable right to copy, use,
				1429	and distribute the work, with or without modification, in any medium, without
				1430	royalty, provided that, unless separate permission is granted, redistributed
				1431	modified works do not contain misleading author, version, name of work, or
				1432	endorsement information.
				1433	</t>
				1434	</section>
				1435
				1436	</middle>
				1437	<back>
				1438	<references title="Normative References">
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1439	&rfc2119;
				1440	&rfc3533;
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1441	&rfc3629;
Ralph Giles	a1b913f	2013-01-10 17:00:18 -0800	[diff] [blame]	1442	&rfc5334;
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1443	&rfc6381;
				1444	&rfc6716;
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1445
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1446	<reference anchor="EBU-R128" target="http://tech.ebu.ch/loudness">
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1447	<front>
Ralph Giles	864196b	2014-01-20 12:32:11 -0800	[diff] [blame^]	1448	<title>Loudness Recommendation EBU R128</title>
				1449	<author>
				1450	<organization>EBU Technical Committee</organization>
				1451	</author>
				1452	<date month="August" year="2011"/>
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1453	</front>
				1454	</reference>
				1455
				1456	<reference anchor="vorbis-comment"
				1457	target="http://www.xiph.org/vorbis/doc/v-comment.html">
				1458	<front>
				1459	<title>Ogg Vorbis I Format Specification: Comment Field and Header
				1460	Specification</title>
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1461	<author initials="C." surname="Montgomery"
				1462	fullname="Christopher "Monty" Montgomery"/>
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1463	<date month="July" year="2002"/>
Ralph Giles	360a411	2012-07-16 13:53:29 -0400	[diff] [blame]	1464	</front>
				1465	</reference>
				1466
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1467	</references>
				1468
				1469	<references title="Informative References">
				1470
				1471	<!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml"?-->
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1472	&rfc4732;
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1473
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1474	<reference anchor="draft-sheffer-running-code"
Ralph Giles	dfda81e	2013-05-24 17:44:43 +0800	[diff] [blame]	1475	target="https://tools.ietf.org/html/draft-sheffer-running-code-05#section-2">
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1476	<front>
				1477	<title>Improving "Rough Consensus" with Running Code</title>
				1478	<author initials="Y." surname="Sheffer" fullname="Yaron Sheffer"/>
				1479	<author initials="A." surname="Farrel" fullname="Adrian Farrel"/>
Ralph Giles	dfda81e	2013-05-24 17:44:43 +0800	[diff] [blame]	1480	<date month="May" year="2013"/>
Ralph Giles	0c1487a	2013-01-10 16:38:31 -0800	[diff] [blame]	1481	</front>
				1482	</reference>
				1483
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	1484	<reference anchor="flac"
				1485	target="https://xiph.org/flac/format.html">
				1486	<front>
				1487	<title>FLAC - Free Lossless Audio Codec Format Description</title>
				1488	<author initials="J." surname="Coalson" fullname="Josh Coalson"/>
				1489	<date month="January" year="2008"/>
				1490	</front>
				1491	</reference>
				1492
Ralph Giles	9e85220	2013-05-25 01:20:00 +0800	[diff] [blame]	1493	<reference anchor="hanning"
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1494	target="https://en.wikipedia.org/wiki/Hamming_function#Hann_.28Hanning.29_window">
Ralph Giles	9e85220	2013-05-25 01:20:00 +0800	[diff] [blame]	1495	<front>
Ralph Giles	74f6a01	2014-01-17 15:47:08 -0800	[diff] [blame]	1496	<title>Hann window</title>
Ralph Giles	864196b	2014-01-20 12:32:11 -0800	[diff] [blame^]	1497	<author>
				1498	<organization>Wikipedia</organization>
				1499	</author>
Ralph Giles	9e85220	2013-05-25 01:20:00 +0800	[diff] [blame]	1500	<date month="May" year="2013"/>
				1501	</front>
				1502	</reference>
				1503
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1504	<reference anchor="linear-prediction"
				1505	target="https://en.wikipedia.org/wiki/Linear_predictive_coding">
				1506	<front>
Ralph Giles	74f6a01	2014-01-17 15:47:08 -0800	[diff] [blame]	1507	<title>Linear Predictive Coding</title>
Ralph Giles	864196b	2014-01-20 12:32:11 -0800	[diff] [blame^]	1508	<author>
				1509	<organization>Wikipedia</organization>
				1510	</author>
Ralph Giles	c42c6db	2014-01-17 15:36:03 -0800	[diff] [blame]	1511	<date month="January" year="2014"/>
				1512	</front>
				1513	</reference>
				1514
Ralph Giles	078775b	2014-01-17 15:44:08 -0800	[diff] [blame]	1515	<reference anchor="lpc-sample"
				1516	target="https://svn.xiph.org/trunk/vorbis/lib/lpc.c">
				1517	<front>
				1518	<title>Autocorrelation LPC coeff generation algorithm
				1519	(vorbis source code)</title>
				1520	<author initials="J." surname="Degener" fullname="Jutta Degener"/>
				1521	<author initials="C." surname="Bormann" fullname="Carsten Bormann"/>
				1522	<date month="November" year="1994"/>
				1523	</front>
				1524	</reference>
				1525
				1526
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1527	<reference anchor="replay-gain"
				1528	target="http://wiki.xiph.org/VorbisComment#Replay_Gain">
				1529	<front>
				1530	<title>VorbisComment: Replay Gain</title>
				1531	<author initials="C." surname="Parker" fullname="Conrad Parker"/>
				1532	<author initials="M." surname="Leese" fullname="Martin Leese"/>
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1533	<date month="June" year="2009"/>
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1534	</front>
				1535	</reference>
				1536
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1537	<reference anchor="seeking"
				1538	target="http://wiki.xiph.org/Seeking">
				1539	<front>
				1540	<title>Granulepos Encoding and How Seeking Really Works</title>
				1541	<author initials="S." surname="Pfeiffer" fullname="Silvia Pfeiffer"/>
				1542	<author initials="C." surname="Parker" fullname="Conrad Parker"/>
				1543	<author initials="G." surname="Maxwell" fullname="Greg Maxwell"/>
Timothy B. Terriberry	50f214c	2012-11-03 13:01:25 -0700	[diff] [blame]	1544	<date month="May" year="2012"/>
Timothy B. Terriberry	b374461	2012-07-16 13:17:27 -0700	[diff] [blame]	1545	</front>
				1546	</reference>
				1547
Ralph Giles	6bdbd26	2013-05-25 01:18:25 +0800	[diff] [blame]	1548	<reference anchor="vorbis-mapping"
				1549	target="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1550	<front>
Ralph Giles	6bdbd26	2013-05-25 01:18:25 +0800	[diff] [blame]	1551	<title>The Vorbis I Specification, Section 4.3.9 Output Channel Order</title>
Timothy B. Terriberry	396c4e5	2012-07-16 13:43:10 -0700	[diff] [blame]	1552	<author initials="C." surname="Montgomery"
				1553	fullname="Christopher "Monty" Montgomery"/>
Ralph Giles	6bdbd26	2013-05-25 01:18:25 +0800	[diff] [blame]	1554	<date month="January" year="2010"/>
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1555	</front>
				1556	</reference>
				1557
Ralph Giles	6bdbd26	2013-05-25 01:18:25 +0800	[diff] [blame]	1558	<reference anchor="vorbis-trim"
				1559	target="http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-130000A.2">
				1560	<front>
				1561	<title>The Vorbis I Specification, Appendix A: Embedding Vorbis
				1562	into an Ogg stream</title>
				1563	<author initials="C." surname="Montgomery"
				1564	fullname="Christopher "Monty" Montgomery"/>
				1565	<date month="November" year="2008"/>
				1566	</front>
				1567	</reference>
				1568
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	1569	<reference anchor="wave-multichannel"
				1570	target="http://msdn.microsoft.com/en-us/windows/hardware/gg463006.aspx">
				1571	<front>
				1572	<title>Multiple Channel Audio Data and WAVE Files</title>
Ralph Giles	864196b	2014-01-20 12:32:11 -0800	[diff] [blame^]	1573	<author>
				1574	<organization>Microsoft Corporation</organization>
				1575	</author>
Ralph Giles	05bf400	2012-12-19 12:21:06 -0800	[diff] [blame]	1576	<date month="March" year="2007"/>
				1577	</front>
				1578	</reference>
				1579
Timothy B. Terriberry	a2b2e0b	2012-07-05 08:34:15 -0700	[diff] [blame]	1580	</references>
				1581
				1582	</back>
				1583	</rfc>